Move from_iter_values to GenericByteArray (#4586)
diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs
index 5483960..67be376 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -19,7 +19,6 @@
use crate::{
Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait,
};
-use arrow_buffer::MutableBuffer;
use arrow_data::ArrayData;
use arrow_schema::DataType;
@@ -83,42 +82,6 @@
Self::from(data)
}
- /// Creates a [`GenericBinaryArray`] based on an iterator of values without nulls
- pub fn from_iter_values<Ptr, I>(iter: I) -> Self
- where
- Ptr: AsRef<[u8]>,
- I: IntoIterator<Item = Ptr>,
- {
- let iter = iter.into_iter();
- let (_, data_len) = iter.size_hint();
- let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
- let mut offsets =
- MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
- let mut values = MutableBuffer::new(0);
-
- let mut length_so_far = OffsetSize::zero();
- offsets.push(length_so_far);
-
- for s in iter {
- let s = s.as_ref();
- length_so_far += OffsetSize::from_usize(s.len()).unwrap();
- offsets.push(length_so_far);
- values.extend_from_slice(s);
- }
-
- // iterator size hint may not be correct so compute the actual number of offsets
- assert!(!offsets.is_empty()); // wrote at least one
- let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;
-
- let array_data = ArrayData::builder(Self::DATA_TYPE)
- .len(actual_len)
- .add_buffer(offsets.into())
- .add_buffer(values.into());
- let array_data = unsafe { array_data.build_unchecked() };
- Self::from(array_data)
- }
-
/// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i`
pub fn take_iter<'a>(
&'a self,
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index be10a45..f694aa3 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -182,6 +182,41 @@
}
}
+ /// Creates a [`GenericByteArray`] based on an iterator of values without nulls
+ pub fn from_iter_values<Ptr, I>(iter: I) -> Self
+ where
+ Ptr: AsRef<T::Native>,
+ I: IntoIterator<Item = Ptr>,
+ {
+ let iter = iter.into_iter();
+ let (_, data_len) = iter.size_hint();
+ let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
+
+ let mut offsets =
+ MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
+ offsets.push(T::Offset::usize_as(0));
+
+ let mut values = MutableBuffer::new(0);
+ for s in iter {
+ let s: &[u8] = s.as_ref().as_ref();
+ values.extend_from_slice(s);
+ offsets.push(T::Offset::usize_as(values.len()));
+ }
+
+ T::Offset::from_usize(values.len()).expect("offset overflow");
+ let offsets = Buffer::from(offsets);
+
+ // Safety: valid by construction
+ let value_offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
+
+ Self {
+ data_type: T::DATA_TYPE,
+ value_data: values.into(),
+ value_offsets,
+ nulls: None,
+ }
+ }
+
/// Deconstruct this array into its constituent parts
pub fn into_parts(self) -> (OffsetBuffer<T::Offset>, Buffer, Option<NullBuffer>) {
(self.value_offsets, self.value_data, self.nulls)
diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs
index f9a3a5f..4c40e8b 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -17,8 +17,6 @@
use crate::types::GenericStringType;
use crate::{GenericBinaryArray, GenericByteArray, GenericListArray, OffsetSizeTrait};
-use arrow_buffer::MutableBuffer;
-use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
/// A [`GenericByteArray`] for storing `str`
@@ -40,42 +38,6 @@
self.value(i).chars().count()
}
- /// Creates a [`GenericStringArray`] based on an iterator of values without nulls
- pub fn from_iter_values<Ptr, I>(iter: I) -> Self
- where
- Ptr: AsRef<str>,
- I: IntoIterator<Item = Ptr>,
- {
- let iter = iter.into_iter();
- let (_, data_len) = iter.size_hint();
- let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
- let mut offsets =
- MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
- let mut values = MutableBuffer::new(0);
-
- let mut length_so_far = OffsetSize::zero();
- offsets.push(length_so_far);
-
- for i in iter {
- let s = i.as_ref();
- length_so_far += OffsetSize::from_usize(s.len()).unwrap();
- offsets.push(length_so_far);
- values.extend_from_slice(s.as_bytes());
- }
-
- // iterator size hint may not be correct so compute the actual number of offsets
- assert!(!offsets.is_empty()); // wrote at least one
- let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;
-
- let array_data = ArrayData::builder(Self::DATA_TYPE)
- .len(actual_len)
- .add_buffer(offsets.into())
- .add_buffer(values.into());
- let array_data = unsafe { array_data.build_unchecked() };
- Self::from(array_data)
- }
-
/// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i`
pub fn take_iter<'a>(
&'a self,
@@ -210,6 +172,7 @@
use crate::types::UInt8Type;
use crate::Array;
use arrow_buffer::Buffer;
+ use arrow_data::ArrayData;
use arrow_schema::Field;
use std::sync::Arc;