| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use arrow::array::{ |
| make_array, Array, BooleanBuilder, Decimal128Builder, Int32Array, Int32Builder, Int64Array, |
| StringArray, StructBuilder, UInt64Array, |
| }; |
| use arrow_array::Decimal128Array; |
| use arrow_buffer::{ArrowNativeType, Buffer}; |
| use arrow_data::ArrayData; |
| use arrow_schema::{DataType, Field, UnionFields, UnionMode}; |
| use std::ptr::NonNull; |
| use std::sync::Arc; |
| |
| #[test] |
| #[should_panic(expected = "Need at least 80 bytes in buffers[0] in array of type Int64, but got 8")] |
| fn test_buffer_too_small() { |
| let buffer = Buffer::from_slice_ref([0i32, 2i32]); |
| // should fail as the declared size (10*8 = 80) is larger than the underlying bfufer (8) |
| ArrayData::try_new(DataType::Int64, 10, None, 0, vec![buffer], vec![]).unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Need at least 16 bytes in buffers[0] in array of type Int64, but got 8")] |
| fn test_buffer_too_small_offset() { |
| let buffer = Buffer::from_slice_ref([0i32, 2i32]); |
| // should fail -- size is ok, but also has offset |
| ArrayData::try_new(DataType::Int64, 1, None, 1, vec![buffer], vec![]).unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Expected 1 buffers in array of type Int64, got 2")] |
| fn test_bad_number_of_buffers() { |
| let buffer1 = Buffer::from_slice_ref([0i32, 2i32]); |
| let buffer2 = Buffer::from_slice_ref([0i32, 2i32]); |
| ArrayData::try_new(DataType::Int64, 1, None, 0, vec![buffer1, buffer2], vec![]).unwrap(); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Need at least 18446744073709551615 bytes in buffers[0] in array of type Int64, but got 8" |
| )] |
| fn test_fixed_width_overflow() { |
| let buffer = Buffer::from_slice_ref([0i32, 2i32]); |
| ArrayData::try_new(DataType::Int64, usize::MAX, None, 0, vec![buffer], vec![]).unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "null_bit_buffer size too small. got 1 needed 2")] |
| fn test_bitmap_too_small() { |
| let buffer = make_i32_buffer(9); |
| let null_bit_buffer = Buffer::from(vec![0b11111111]); |
| |
| ArrayData::try_new( |
| DataType::Int32, |
| 9, |
| Some(null_bit_buffer), |
| 0, |
| vec![buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| // Test creating a dictionary with a non integer type |
| #[test] |
| #[should_panic(expected = "Dictionary key type must be integer, but was Utf8")] |
| fn test_non_int_dictionary() { |
| let i32_buffer = Buffer::from_slice_ref([0i32, 2i32]); |
| let data_type = DataType::Dictionary(Box::new(DataType::Utf8), Box::new(DataType::Int32)); |
| let child_data = ArrayData::try_new( |
| DataType::Int32, |
| 1, |
| None, |
| 0, |
| vec![i32_buffer.clone()], |
| vec![], |
| ) |
| .unwrap(); |
| ArrayData::try_new( |
| data_type, |
| 1, |
| None, |
| 0, |
| vec![i32_buffer.clone(), i32_buffer], |
| vec![child_data], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Expected LargeUtf8 but child data had Utf8")] |
| fn test_mismatched_dictionary_types() { |
| // test w/ dictionary created with a child array data that has type different than declared |
| let string_array: StringArray = vec![Some("foo"), Some("bar")].into_iter().collect(); |
| let i32_buffer = Buffer::from_slice_ref([0i32, 1i32]); |
| // Dict says LargeUtf8 but array is Utf8 |
| let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::LargeUtf8)); |
| let child_data = string_array.into_data(); |
| ArrayData::try_new(data_type, 1, None, 0, vec![i32_buffer], vec![child_data]).unwrap(); |
| } |
| |
| #[test] |
| fn test_empty_utf8_array_with_empty_offsets_buffer() { |
| let data_buffer = Buffer::from(&[]); |
| let offsets_buffer = Buffer::from(&[]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 0, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| fn test_empty_utf8_array_with_single_zero_offset() { |
| let data_buffer = Buffer::from(&[]); |
| let offsets_buffer = Buffer::from_slice_ref([0i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 0, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "First offset 1 of Utf8 is larger than values length 0")] |
| fn test_empty_utf8_array_with_invalid_offset() { |
| let data_buffer = Buffer::from(&[]); |
| let offsets_buffer = Buffer::from_slice_ref([1i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 0, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| fn test_empty_utf8_array_with_non_zero_offset() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| let offsets_buffer = Buffer::from_slice_ref([0i32, 2, 6, 0]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 0, |
| None, |
| 3, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 8 bytes got 4")] |
| fn test_empty_large_utf8_array_with_wrong_type_offsets() { |
| let data_buffer = Buffer::from(&[]); |
| let offsets_buffer = Buffer::from_slice_ref([0i32]); |
| ArrayData::try_new( |
| DataType::LargeUtf8, |
| 0, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Buffer 0 of Utf8 isn't large enough. Expected 12 bytes got 8")] |
| fn test_validate_offsets_i32() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| let offsets_buffer = Buffer::from_slice_ref([0i32, 2i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 24 bytes got 16")] |
| fn test_validate_offsets_i64() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| let offsets_buffer = Buffer::from_slice_ref([0i64, 2i64]); |
| ArrayData::try_new( |
| DataType::LargeUtf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Error converting offset[0] (-2) to usize for Utf8")] |
| fn test_validate_offsets_negative_first_i32() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| let offsets_buffer = Buffer::from_slice_ref([-2i32, 1i32, 3i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Error converting offset[2] (-3) to usize for Utf8")] |
| fn test_validate_offsets_negative_last_i32() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| let offsets_buffer = Buffer::from_slice_ref([0i32, 2i32, -3i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "First offset 4 in Utf8 is smaller than last offset 3")] |
| fn test_validate_offsets_range_too_small() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| // start offset is larger than end |
| let offsets_buffer = Buffer::from_slice_ref([4i32, 2i32, 3i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Last offset 10 of Utf8 is larger than values length 6")] |
| fn test_validate_offsets_range_too_large() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| // 10 is off the end of the buffer |
| let offsets_buffer = Buffer::from_slice_ref([0i32, 2i32, 10i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "First offset 10 of Utf8 is larger than values length 6")] |
| fn test_validate_offsets_first_too_large() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| // 10 is off the end of the buffer |
| let offsets_buffer = Buffer::from_slice_ref([10i32, 2i32, 10i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| fn test_validate_offsets_first_too_large_skipped() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| // 10 is off the end of the buffer, but offset starts at 1 so it is skipped |
| let offsets_buffer = Buffer::from_slice_ref([10i32, 2i32, 3i32, 4i32]); |
| let data = ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 1, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| let array: StringArray = data.into(); |
| let expected: StringArray = vec![Some("c"), Some("d")].into_iter().collect(); |
| assert_eq!(array, expected); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Last offset 8 of Utf8 is larger than values length 6")] |
| fn test_validate_offsets_last_too_large() { |
| let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes()); |
| // 10 is off the end of the buffer |
| let offsets_buffer = Buffer::from_slice_ref([5i32, 7i32, 8i32]); |
| ArrayData::try_new( |
| DataType::Utf8, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| /// Test that the list of type `data_type` generates correct offset and size out of bounds errors |
| fn check_list_view_offsets_sizes<T: ArrowNativeType>( |
| data_type: DataType, |
| offsets: Vec<T>, |
| sizes: Vec<T>, |
| ) { |
| let values: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect(); |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| let sizes_buffer = Buffer::from_slice_ref(sizes); |
| ArrayData::try_new( |
| data_type, |
| 4, |
| None, |
| 0, |
| vec![offsets_buffer, sizes_buffer], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Size 3 at index 3 is larger than the remaining values for ListView")] |
| fn test_validate_list_view_offsets_sizes() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_view_offsets_sizes::<i32>( |
| DataType::ListView(Arc::new(field_type)), |
| vec![0, 1, 1, 2], |
| vec![1, 1, 1, 3], |
| ); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Size 3 at index 3 is larger than the remaining values for LargeListView" |
| )] |
| fn test_validate_large_list_view_offsets_sizes() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_view_offsets_sizes::<i64>( |
| DataType::LargeListView(Arc::new(field_type)), |
| vec![0, 1, 1, 2], |
| vec![1, 1, 1, 3], |
| ); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Error converting offset[1] (-1) to usize for ListView")] |
| fn test_validate_list_view_negative_offsets() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_view_offsets_sizes::<i32>( |
| DataType::ListView(Arc::new(field_type)), |
| vec![0, -1, 1, 2], |
| vec![1, 1, 1, 3], |
| ); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Error converting size[2] (-1) to usize for ListView")] |
| fn test_validate_list_view_negative_sizes() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_view_offsets_sizes::<i32>( |
| DataType::ListView(Arc::new(field_type)), |
| vec![0, 1, 1, 2], |
| vec![1, 1, -1, 3], |
| ); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Error converting offset[1] (-1) to usize for LargeListView")] |
| fn test_validate_large_list_view_negative_offsets() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_view_offsets_sizes::<i64>( |
| DataType::LargeListView(Arc::new(field_type)), |
| vec![0, -1, 1, 2], |
| vec![1, 1, 1, 3], |
| ); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Error converting size[2] (-1) to usize for LargeListView")] |
| fn test_validate_large_list_view_negative_sizes() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_view_offsets_sizes::<i64>( |
| DataType::LargeListView(Arc::new(field_type)), |
| vec![0, 1, 1, 2], |
| vec![1, 1, -1, 3], |
| ); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Values length 4 is less than the length (2) multiplied by the value size (2) for FixedSizeList" |
| )] |
| fn test_validate_fixed_size_list() { |
| // child has 4 elements, |
| let child_array = vec![Some(1), Some(2), Some(3), None] |
| .into_iter() |
| .collect::<Int32Array>(); |
| |
| // but claim we have 3 elements for a fixed size of 2 |
| // 10 is off the end of the buffer |
| let field = Field::new("field", DataType::Int32, true); |
| ArrayData::try_new( |
| DataType::FixedSizeList(Arc::new(field), 2), |
| 3, |
| None, |
| 0, |
| vec![], |
| vec![child_array.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Child type mismatch for Struct")] |
| fn test_validate_struct_child_type() { |
| let field1 = vec![Some(1), Some(2), Some(3), None] |
| .into_iter() |
| .collect::<Int32Array>(); |
| |
| // validate the the type of struct fields matches child fields |
| ArrayData::try_new( |
| DataType::Struct(vec![Field::new("field1", DataType::Int64, true)].into()), |
| 3, |
| None, |
| 0, |
| vec![], |
| vec![field1.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "child array #0 for field field1 has length smaller than expected for struct array (4 < 6)" |
| )] |
| fn test_validate_struct_child_length() { |
| // field length only has 4 items, but array claims to have 6 |
| let field1 = vec![Some(1), Some(2), Some(3), None] |
| .into_iter() |
| .collect::<Int32Array>(); |
| |
| ArrayData::try_new( |
| DataType::Struct(vec![Field::new("field1", DataType::Int32, true)].into()), |
| 6, |
| None, |
| 0, |
| vec![], |
| vec![field1.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| /// Test that the array of type `data_type` that has invalid utf8 data errors |
| fn check_utf8_validation<T: ArrowNativeType>(data_type: DataType) { |
| // 0x80 is a utf8 continuation sequence and is not a valid utf8 sequence itself |
| let data_buffer = Buffer::from_slice_ref([b'a', b'a', 0x80, 0x00]); |
| let offsets: Vec<T> = [0, 2, 3] |
| .iter() |
| .map(|&v| T::from_usize(v).unwrap()) |
| .collect(); |
| |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| ArrayData::try_new( |
| data_type, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Invalid UTF8 sequence at string index 1 (2..3)")] |
| fn test_validate_utf8_content() { |
| check_utf8_validation::<i32>(DataType::Utf8); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Invalid UTF8 sequence at string index 1 (2..3)")] |
| fn test_validate_large_utf8_content() { |
| check_utf8_validation::<i64>(DataType::LargeUtf8); |
| } |
| |
| /// Tests that offsets are at valid codepoint boundaries |
| fn check_utf8_char_boundary<T: ArrowNativeType>(data_type: DataType) { |
| let data_buffer = Buffer::from("🙀".as_bytes()); |
| let offsets: Vec<T> = [0, 1, data_buffer.len()] |
| .iter() |
| .map(|&v| T::from_usize(v).unwrap()) |
| .collect(); |
| |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| ArrayData::try_new( |
| data_type, |
| 2, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "incomplete utf-8 byte sequence from index 0")] |
| fn test_validate_utf8_char_boundary() { |
| check_utf8_char_boundary::<i32>(DataType::Utf8); |
| } |
| |
| #[test] |
| #[should_panic(expected = "incomplete utf-8 byte sequence from index 0")] |
| fn test_validate_large_utf8_char_boundary() { |
| check_utf8_char_boundary::<i64>(DataType::LargeUtf8); |
| } |
| |
| /// Test that the array of type `data_type` that has invalid indexes (out of bounds) |
| fn check_index_out_of_bounds_validation<T: ArrowNativeType>(data_type: DataType) { |
| let data_buffer = Buffer::from_slice_ref([b'a', b'b', b'c', b'd']); |
| // First two offsets are fine, then 5 is out of bounds |
| let offsets: Vec<T> = [0, 1, 2, 5, 2] |
| .iter() |
| .map(|&v| T::from_usize(v).unwrap()) |
| .collect(); |
| |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| ArrayData::try_new( |
| data_type, |
| 4, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")] |
| fn test_validate_utf8_out_of_bounds() { |
| check_index_out_of_bounds_validation::<i32>(DataType::Utf8); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")] |
| fn test_validate_large_utf8_out_of_bounds() { |
| check_index_out_of_bounds_validation::<i64>(DataType::LargeUtf8); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")] |
| fn test_validate_binary_out_of_bounds() { |
| check_index_out_of_bounds_validation::<i32>(DataType::Binary); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")] |
| fn test_validate_large_binary_out_of_bounds() { |
| check_index_out_of_bounds_validation::<i64>(DataType::LargeBinary); |
| } |
| |
| // validate that indexes don't go bacwards check indexes that go backwards |
| fn check_index_backwards_validation<T: ArrowNativeType>(data_type: DataType) { |
| let data_buffer = Buffer::from_slice_ref([b'a', b'b', b'c', b'd']); |
| // First three offsets are fine, then 1 goes backwards |
| let offsets: Vec<T> = [0, 1, 2, 2, 1] |
| .iter() |
| .map(|&v| T::from_usize(v).unwrap()) |
| .collect(); |
| |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| ArrayData::try_new( |
| data_type, |
| 4, |
| None, |
| 0, |
| vec![offsets_buffer, data_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")] |
| fn test_validate_utf8_index_backwards() { |
| check_index_backwards_validation::<i32>(DataType::Utf8); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")] |
| fn test_validate_large_utf8_index_backwards() { |
| check_index_backwards_validation::<i64>(DataType::LargeUtf8); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")] |
| fn test_validate_binary_index_backwards() { |
| check_index_backwards_validation::<i32>(DataType::Binary); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")] |
| fn test_validate_large_binary_index_backwards() { |
| check_index_backwards_validation::<i64>(DataType::LargeBinary); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Value at position 1 out of bounds: 3 (should be in [0, 1])")] |
| fn test_validate_dictionary_index_too_large() { |
| let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect(); |
| |
| // 3 is not a valid index into the values (only 0 and 1) |
| let keys: Int32Array = [Some(1), Some(3)].into_iter().collect(); |
| |
| let data_type = DataType::Dictionary( |
| Box::new(keys.data_type().clone()), |
| Box::new(values.data_type().clone()), |
| ); |
| |
| ArrayData::try_new( |
| data_type, |
| 2, |
| None, |
| 0, |
| vec![keys.into_data().buffers()[0].clone()], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Value at position 1 out of bounds: -1 (should be in [0, 1]")] |
| fn test_validate_dictionary_index_negative() { |
| let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect(); |
| |
| // -1 is not a valid index at all! |
| let keys: Int32Array = [Some(1), Some(-1)].into_iter().collect(); |
| |
| let data_type = DataType::Dictionary( |
| Box::new(keys.data_type().clone()), |
| Box::new(values.data_type().clone()), |
| ); |
| |
| ArrayData::try_new( |
| data_type, |
| 2, |
| None, |
| 0, |
| vec![keys.into_data().buffers()[0].clone()], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| fn test_validate_dictionary_index_negative_but_not_referenced() { |
| let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect(); |
| |
| // -1 is not a valid index at all, but the array is length 1 |
| // so the -1 should not be looked at |
| let keys: Int32Array = [Some(1), Some(-1)].into_iter().collect(); |
| |
| let data_type = DataType::Dictionary( |
| Box::new(keys.data_type().clone()), |
| Box::new(values.data_type().clone()), |
| ); |
| |
| // Expect this not to panic |
| ArrayData::try_new( |
| data_type, |
| 1, |
| None, |
| 0, |
| vec![keys.into_data().buffers()[0].clone()], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Value at position 0 out of bounds: 18446744073709551615 (can not convert to i64)" |
| )] |
| fn test_validate_dictionary_index_giant_negative() { |
| let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect(); |
| |
| // -1 is not a valid index at all! |
| let keys: UInt64Array = [Some(u64::MAX), Some(1)].into_iter().collect(); |
| |
| let data_type = DataType::Dictionary( |
| Box::new(keys.data_type().clone()), |
| Box::new(values.data_type().clone()), |
| ); |
| |
| ArrayData::try_new( |
| data_type, |
| 2, |
| None, |
| 0, |
| vec![keys.into_data().buffers()[0].clone()], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| /// Test that the list of type `data_type` generates correct offset out of bounds errors |
| fn check_list_offsets<T: ArrowNativeType>(data_type: DataType) { |
| let values: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect(); |
| |
| // 5 is an invalid offset into a list of only three values |
| let offsets: Vec<T> = [0, 2, 5, 4] |
| .iter() |
| .map(|&v| T::from_usize(v).unwrap()) |
| .collect(); |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| |
| ArrayData::try_new( |
| data_type, |
| 3, |
| None, |
| 0, |
| vec![offsets_buffer], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: offset at position 2 out of bounds: 5 > 4")] |
| fn test_validate_list_offsets() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_offsets::<i32>(DataType::List(Arc::new(field_type))); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Offset invariant failure: offset at position 2 out of bounds: 5 > 4")] |
| fn test_validate_large_list_offsets() { |
| let field_type = Field::new("f", DataType::Int32, true); |
| check_list_offsets::<i64>(DataType::LargeList(Arc::new(field_type))); |
| } |
| |
| /// Test that the list of type `data_type` generates correct errors for negative offsets |
| #[test] |
| #[should_panic( |
| expected = "Offset invariant failure: Could not convert offset -1 to usize at position 2" |
| )] |
| fn test_validate_list_negative_offsets() { |
| let values: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect(); |
| let field_type = Field::new("f", values.data_type().clone(), true); |
| let data_type = DataType::List(Arc::new(field_type)); |
| |
| // -1 is an invalid offset any way you look at it |
| let offsets: Vec<i32> = vec![0, 2, -1, 4]; |
| let offsets_buffer = Buffer::from_slice_ref(offsets); |
| |
| ArrayData::try_new( |
| data_type, |
| 3, |
| None, |
| 0, |
| vec![offsets_buffer], |
| vec![values.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| /// returns a buffer initialized with some constant value for tests |
| fn make_i32_buffer(n: usize) -> Buffer { |
| Buffer::from_slice_ref(vec![42i32; n]) |
| } |
| |
| #[test] |
| #[should_panic(expected = "Expected Int64 but child data had Int32")] |
| fn test_validate_union_different_types() { |
| let field1 = vec![Some(1), Some(2)].into_iter().collect::<Int32Array>(); |
| |
| let field2 = vec![Some(1), Some(2)].into_iter().collect::<Int32Array>(); |
| |
| let type_ids = Buffer::from_slice_ref([0i8, 1i8]); |
| |
| ArrayData::try_new( |
| DataType::Union( |
| UnionFields::new( |
| vec![0, 1], |
| vec![ |
| Field::new("field1", DataType::Int32, true), |
| Field::new("field2", DataType::Int64, true), // data is int32 |
| ], |
| ), |
| UnionMode::Sparse, |
| ), |
| 2, |
| None, |
| 0, |
| vec![type_ids], |
| vec![field1.into_data(), field2.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| // sparse with wrong sized children |
| #[test] |
| #[should_panic( |
| expected = "Sparse union child array #1 has length smaller than expected for union array (1 < 2)" |
| )] |
| fn test_validate_union_sparse_different_child_len() { |
| let field1 = vec![Some(1), Some(2)].into_iter().collect::<Int32Array>(); |
| |
| // field 2 only has 1 item but array should have 2 |
| let field2 = vec![Some(1)].into_iter().collect::<Int64Array>(); |
| |
| let type_ids = Buffer::from_slice_ref([0i8, 1i8]); |
| |
| ArrayData::try_new( |
| DataType::Union( |
| UnionFields::new( |
| vec![0, 1], |
| vec![ |
| Field::new("field1", DataType::Int32, true), |
| Field::new("field2", DataType::Int64, true), |
| ], |
| ), |
| UnionMode::Sparse, |
| ), |
| 2, |
| None, |
| 0, |
| vec![type_ids], |
| vec![field1.into_data(), field2.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Expected 2 buffers in array of type Union")] |
| fn test_validate_union_dense_without_offsets() { |
| let field1 = vec![Some(1), Some(2)].into_iter().collect::<Int32Array>(); |
| |
| let field2 = vec![Some(1)].into_iter().collect::<Int64Array>(); |
| |
| let type_ids = Buffer::from_slice_ref([0i8, 1i8]); |
| |
| ArrayData::try_new( |
| DataType::Union( |
| UnionFields::new( |
| vec![0, 1], |
| vec![ |
| Field::new("field1", DataType::Int32, true), |
| Field::new("field2", DataType::Int64, true), |
| ], |
| ), |
| UnionMode::Dense, |
| ), |
| 2, |
| None, |
| 0, |
| vec![type_ids], // need offsets buffer here too |
| vec![field1.into_data(), field2.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Need at least 8 bytes in buffers[1] in array of type Union")] |
| fn test_validate_union_dense_with_bad_len() { |
| let field1 = vec![Some(1), Some(2)].into_iter().collect::<Int32Array>(); |
| |
| let field2 = vec![Some(1)].into_iter().collect::<Int64Array>(); |
| |
| let type_ids = Buffer::from_slice_ref([0i8, 1i8]); |
| let offsets = Buffer::from_slice_ref([0i32]); // should have 2 offsets, but only have 1 |
| |
| ArrayData::try_new( |
| DataType::Union( |
| UnionFields::new( |
| vec![0, 1], |
| vec![ |
| Field::new("field1", DataType::Int32, true), |
| Field::new("field2", DataType::Int64, true), |
| ], |
| ), |
| UnionMode::Dense, |
| ), |
| 2, |
| None, |
| 0, |
| vec![type_ids, offsets], |
| vec![field1.into_data(), field2.into_data()], |
| ) |
| .unwrap(); |
| } |
| |
| #[test] |
| fn test_try_new_sliced_struct() { |
| let mut builder = StructBuilder::new( |
| vec![ |
| Field::new("a", DataType::Int32, true), |
| Field::new("b", DataType::Boolean, true), |
| ], |
| vec![ |
| Box::new(Int32Builder::with_capacity(5)), |
| Box::new(BooleanBuilder::with_capacity(5)), |
| ], |
| ); |
| |
| // struct[0] = { a: 10, b: true } |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_option(Some(10)); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_option(Some(true)); |
| builder.append(true); |
| |
| // struct[1] = null |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_option(None); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_option(None); |
| builder.append(false); |
| |
| // struct[2] = { a: null, b: false } |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_option(None); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_option(Some(false)); |
| builder.append(true); |
| |
| // struct[3] = { a: 21, b: null } |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_option(Some(21)); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_option(None); |
| builder.append(true); |
| |
| // struct[4] = { a: 18, b: false } |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_option(Some(18)); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_option(Some(false)); |
| builder.append(true); |
| |
| let struct_array = builder.finish(); |
| let struct_array_slice = struct_array.slice(1, 3); |
| assert_eq!(struct_array_slice, struct_array_slice); |
| } |
| |
| #[test] |
| fn test_string_data_from_foreign() { |
| let mut strings = "foobarfoobar".to_owned(); |
| let mut offsets = vec![0_i32, 0, 3, 6, 12]; |
| let mut bitmap = vec![0b1110_u8]; |
| |
| let strings_buffer = unsafe { |
| Buffer::from_custom_allocation( |
| NonNull::new_unchecked(strings.as_mut_ptr()), |
| strings.len(), |
| Arc::new(strings), |
| ) |
| }; |
| let offsets_buffer = unsafe { |
| Buffer::from_custom_allocation( |
| NonNull::new_unchecked(offsets.as_mut_ptr() as *mut u8), |
| offsets.len() * std::mem::size_of::<i32>(), |
| Arc::new(offsets), |
| ) |
| }; |
| let null_buffer = unsafe { |
| Buffer::from_custom_allocation( |
| NonNull::new_unchecked(bitmap.as_mut_ptr()), |
| bitmap.len(), |
| Arc::new(bitmap), |
| ) |
| }; |
| |
| let data = ArrayData::try_new( |
| DataType::Utf8, |
| 4, |
| Some(null_buffer), |
| 0, |
| vec![offsets_buffer, strings_buffer], |
| vec![], |
| ) |
| .unwrap(); |
| |
| let array = make_array(data); |
| let array = array.as_any().downcast_ref::<StringArray>().unwrap(); |
| |
| let expected = StringArray::from(vec![None, Some("foo"), Some("bar"), Some("foobar")]); |
| |
| assert_eq!(array, &expected); |
| } |
| |
| #[test] |
| fn test_decimal_full_validation() { |
| let array = Decimal128Array::from(vec![123456_i128]); |
| let error = array.validate_decimal_precision(5).unwrap_err(); |
| assert_eq!( |
| "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", |
| error.to_string() |
| ); |
| } |
| |
| #[test] |
| fn test_decimal_validation() { |
| let mut builder = Decimal128Builder::with_capacity(4); |
| builder.append_value(10000); |
| builder.append_value(20000); |
| let array = builder.finish(); |
| |
| array.into_data().validate_full().unwrap(); |
| } |
| |
| #[test] |
| #[cfg(not(feature = "force_validate"))] |
| fn test_sliced_array_child() { |
| let values = Int32Array::from_iter_values([1, 2, 3]); |
| let values_sliced = values.slice(1, 2); |
| let offsets = Buffer::from_iter([1_i32, 3_i32]); |
| |
| let list_field = Field::new("element", DataType::Int32, false); |
| let data_type = DataType::List(Arc::new(list_field)); |
| |
| let data = unsafe { |
| ArrayData::new_unchecked( |
| data_type, |
| 1, |
| None, |
| None, |
| 0, |
| vec![offsets], |
| vec![values_sliced.into_data()], |
| ) |
| }; |
| |
| let err = data.validate_values().unwrap_err(); |
| assert_eq!(err.to_string(), "Invalid argument error: Offset invariant failure: offset at position 1 out of bounds: 3 > 2"); |
| } |