ARROW-11799: [Rust] fix len of string and binary arrays created from unbound iterator

While looking for a way to make loading array data from parquet files faster, I stumbled on an edge case where string and binary arrays are created with an incorrect length from an iterator with no upper bound.

Here is an example for such an iterator:

```
 // iterator that doesn't declare (upper) size bound
let string_iter = (0..).scan(0usize, |pos, i| {
    if *pos < 10 {
        *pos += 1;
        Some(Some(format!("value {}", i)))
    }
    else {
         // actually returns up to 10 values
         None
     }
})
// limited using take()
.take(100);
```

For even more details please see the new tests I have added in this PR.
Fortunately this is easy to fix by using the length of the child offset array.

@jorgecarleitao

Closes #9588 from yordan-pavlov/fix_array_len_from_unbound_iter

Authored-by: Yordan Pavlov <yordan.pavlov@outlook.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs
index 0af194e..042ab55 100644
--- a/rust/arrow/src/array/array_binary.rs
+++ b/rust/arrow/src/array/array_binary.rs
@@ -258,6 +258,8 @@
             }
         }
 
+        // calculate actual data_len, which may be different from the iterator's upper bound
+        let data_len = offsets.len() - 1;
         let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
             .len(data_len)
             .add_buffer(Buffer::from_slice_ref(&offsets))
@@ -872,6 +874,30 @@
     }
 
     #[test]
+    fn test_binary_array_from_unbound_iter() {
+        // iterator that doesn't declare (upper) size bound
+        let value_iter = (0..)
+            .scan(0usize, |pos, i| {
+                if *pos < 10 {
+                    *pos += 1;
+                    Some(Some(format!("value {}", i)))
+                } else {
+                    // actually returns up to 10 values
+                    None
+                }
+            })
+            // limited using take()
+            .take(100);
+
+        let (_, upper_size_bound) = value_iter.size_hint();
+        // the upper bound, defined by take above, is 100
+        assert_eq!(upper_size_bound, Some(100));
+        let binary_array: BinaryArray = value_iter.collect();
+        // but the actual number of items in the array should be 10
+        assert_eq!(binary_array.len(), 10);
+    }
+
+    #[test]
     #[should_panic(
         expected = "assertion failed: `(left == right)`\n  left: `UInt32`,\n \
                     right: `UInt8`: BinaryArray can only be created from List<u8> arrays, \
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
index 0bde7bc..6afe3e7 100644
--- a/rust/arrow/src/array/array_primitive.rs
+++ b/rust/arrow/src/array/array_primitive.rs
@@ -841,7 +841,6 @@
     #[test]
     fn test_primitive_from_iter_values() {
         // Test building a primitive array with from_iter_values
-
         let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10);
         assert_eq!(10, arr.len());
         assert_eq!(0, arr.null_count());
@@ -851,6 +850,30 @@
     }
 
     #[test]
+    fn test_primitive_array_from_unbound_iter() {
+        // iterator that doesn't declare (upper) size bound
+        let value_iter = (0..)
+            .scan(0usize, |pos, i| {
+                if *pos < 10 {
+                    *pos += 1;
+                    Some(Some(i))
+                } else {
+                    // actually returns up to 10 values
+                    None
+                }
+            })
+            // limited using take()
+            .take(100);
+
+        let (_, upper_size_bound) = value_iter.size_hint();
+        // the upper bound, defined by take above, is 100
+        assert_eq!(upper_size_bound, Some(100));
+        let primitive_array: PrimitiveArray<Int32Type> = value_iter.collect();
+        // but the actual number of items in the array should be 10
+        assert_eq!(primitive_array.len(), 10);
+    }
+
+    #[test]
     #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
                                (values buffer)")]
     fn test_primitive_array_invalid_buffer_len() {
diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs
index fbce81e..588e5a0 100644
--- a/rust/arrow/src/array/array_string.rs
+++ b/rust/arrow/src/array/array_string.rs
@@ -205,8 +205,8 @@
         let (_, data_len) = iter.size_hint();
         let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
 
-        let mut offsets =
-            MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
+        let offset_size = std::mem::size_of::<OffsetSize>();
+        let mut offsets = MutableBuffer::new((data_len + 1) * offset_size);
         let mut values = MutableBuffer::new(0);
         let mut null_buf = MutableBuffer::new_null(data_len);
         let null_slice = null_buf.as_slice_mut();
@@ -214,19 +214,21 @@
         offsets.push(length_so_far);
 
         for (i, s) in iter.enumerate() {
-            if let Some(s) = s {
-                let s = s.as_ref();
+            let value_bytes = if let Some(ref s) = s {
                 // set null bit
                 bit_util::set_bit(null_slice, i);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
+                let s_bytes = s.as_ref().as_bytes();
+                length_so_far += OffsetSize::from_usize(s_bytes.len()).unwrap();
+                s_bytes
             } else {
-                values.extend_from_slice(b"");
-            }
+                b""
+            };
+            values.extend_from_slice(value_bytes);
             offsets.push(length_so_far);
         }
 
+        // calculate actual data_len, which may be different from the iterator's upper bound
+        let data_len = (offsets.len() / offset_size) - 1;
         let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
             .len(data_len)
             .add_buffer(offsets.into())
@@ -490,4 +492,28 @@
         assert_eq!(array1.value(0), "hello");
         assert_eq!(array1.value(1), "hello2");
     }
+
+    #[test]
+    fn test_string_array_from_unbound_iter() {
+        // iterator that doesn't declare (upper) size bound
+        let string_iter = (0..)
+            .scan(0usize, |pos, i| {
+                if *pos < 10 {
+                    *pos += 1;
+                    Some(Some(format!("value {}", i)))
+                } else {
+                    // actually returns up to 10 values
+                    None
+                }
+            })
+            // limited using take()
+            .take(100);
+
+        let (_, upper_size_bound) = string_iter.size_hint();
+        // the upper bound, defined by take above, is 100
+        assert_eq!(upper_size_bound, Some(100));
+        let string_array: StringArray = string_iter.collect();
+        // but the actual number of items in the array should be 10
+        assert_eq!(string_array.len(), 10);
+    }
 }