Fix ignored limit on `lexsort_to_indices` (#2991)
* Fix ignored limit on lexsort_to_indices
* Update comments
* Update arrow/src/compute/kernels/sort.rs
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index b297622..a10e674 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -950,7 +950,7 @@
});
Ok(UInt32Array::from_iter_values(
- value_indices.iter().map(|i| *i as u32),
+ value_indices.iter().take(len).map(|i| *i as u32),
))
}
@@ -1422,6 +1422,18 @@
}
}
+ /// slice all arrays in expected_output to offset/length
+ fn slice_arrays(
+ expected_output: Vec<ArrayRef>,
+ offset: usize,
+ length: usize,
+ ) -> Vec<ArrayRef> {
+ expected_output
+ .into_iter()
+ .map(|array| array.slice(offset, length))
+ .collect()
+ }
+
fn test_sort_binary_arrays(
data: Vec<Option<Vec<u8>>>,
options: Option<SortOptions>,
@@ -3439,8 +3451,10 @@
Some(2),
Some(17),
])) as ArrayRef];
- test_lex_sort_arrays(input.clone(), expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
+ // Explicitly test a limit on the sort as a demonstration
let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(0),
@@ -3519,7 +3533,8 @@
Some(-2),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
// test mix of string and in64 with option
let input = vec![
@@ -3562,7 +3577,8 @@
Some("7"),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
// test sort with nulls first
let input = vec![
@@ -3605,7 +3621,8 @@
Some("world"),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
// test sort with nulls last
let input = vec![
@@ -3648,7 +3665,8 @@
None,
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
// test sort with opposite options
let input = vec![
@@ -3695,7 +3713,15 @@
Some("foo"),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(
+ input.clone(),
+ slice_arrays(expected.clone(), 0, 5),
+ Some(5),
+ );
+
+ // Limiting by more rows than present is ok
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
}
#[test]