support LargeUtf8 in sort kernel (#26)
diff --git a/.gitignore b/.gitignore
index 5b3bf6c..e8d9955 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,6 @@
rusty-tags.vi
.history
.flatbuffers/
-
+.idea/
.vscode
venv/*
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index bf8eda3..30341b6 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -257,7 +257,8 @@
values, v, n, cmp, &options, limit,
)
}
- DataType::Utf8 => sort_string(values, v, n, &options, limit),
+ DataType::Utf8 => sort_string::<i32>(values, v, n, &options, limit),
+ DataType::LargeUtf8 => sort_string::<i64>(values, v, n, &options, limit),
DataType::List(field) => match field.data_type() {
DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, &options, limit),
DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, &options, limit),
@@ -545,14 +546,17 @@
}
/// Sort strings
-fn sort_string(
+fn sort_string<Offset: StringOffsetSizeTrait>(
values: &ArrayRef,
value_indices: Vec<u32>,
null_indices: Vec<u32>,
options: &SortOptions,
limit: Option<usize>,
) -> Result<UInt32Array> {
- let values = as_string_array(values);
+ let values = values
+ .as_any()
+ .downcast_ref::<GenericStringArray<Offset>>()
+ .unwrap();
sort_string_helper(
values,
@@ -958,14 +962,25 @@
assert_eq!(output, expected)
}
+ /// Tests both Utf8 and LargeUtf8
fn test_sort_string_arrays(
data: Vec<Option<&str>>,
options: Option<SortOptions>,
limit: Option<usize>,
expected_data: Vec<Option<&str>>,
) {
- let output = StringArray::from(data);
- let expected = Arc::new(StringArray::from(expected_data)) as ArrayRef;
+ let output = StringArray::from(data.clone());
+ let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
+ let output = match limit {
+ Some(_) => {
+ sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
+ }
+ _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
+ };
+ assert_eq!(&output, &expected);
+
+ let output = LargeStringArray::from(data);
+ let expected = Arc::new(LargeStringArray::from(expected_data)) as ArrayRef;
let output = match limit {
Some(_) => {
sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()