| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| //! Defines sort kernel for `ArrayRef` |
| |
| use crate::array::*; |
| use crate::buffer::MutableBuffer; |
| use crate::compute::take; |
| use crate::datatypes::*; |
| use crate::downcast_dictionary_array; |
| use crate::error::{ArrowError, Result}; |
| use std::cmp::Ordering; |
| use std::collections::HashMap; |
| use TimeUnit::*; |
| |
| /// Sort the `ArrayRef` using `SortOptions`. |
| /// |
| /// Performs a sort on values and indices. Nulls are ordered according |
| /// to the `nulls_first` flag in `options`. Floats are sorted using |
| /// IEEE 754 totalOrder |
| /// |
| /// Returns an `ArrowError::ComputeError(String)` if the array type is |
| /// either unsupported by `sort_to_indices` or `take`. |
| /// |
| /// Note: this is an unstable_sort, meaning it may not preserve the |
| /// order of equal elements. |
| /// |
| /// # Example |
| /// ```rust |
| /// # use std::sync::Arc; |
| /// # use arrow::array::{Int32Array, ArrayRef}; |
| /// # use arrow::error::Result; |
| /// # use arrow::compute::kernels::sort::sort; |
| /// # fn main() -> Result<()> { |
| /// let array: ArrayRef = Arc::new(Int32Array::from(vec![5, 4, 3, 2, 1])); |
| /// let sorted_array = sort(&array, None).unwrap(); |
| /// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap(); |
| /// assert_eq!(sorted_array, &Int32Array::from(vec![1, 2, 3, 4, 5])); |
| /// # Ok(()) |
| /// # } |
| /// ``` |
| pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> { |
| let indices = sort_to_indices(values, options, None)?; |
| take(values.as_ref(), &indices, None) |
| } |
| |
| /// Sort the `ArrayRef` partially. |
| /// |
| /// If `limit` is specified, the resulting array will contain only |
| /// first `limit` in the sort order. Any data data after the limit |
| /// will be discarded. |
| /// |
| /// Note: this is an unstable_sort, meaning it may not preserve the |
| /// order of equal elements. |
| /// |
| /// # Example |
| /// ```rust |
| /// # use std::sync::Arc; |
| /// # use arrow::array::{Int32Array, ArrayRef}; |
| /// # use arrow::error::Result; |
| /// # use arrow::compute::kernels::sort::{sort_limit, SortOptions}; |
| /// # fn main() -> Result<()> { |
| /// let array: ArrayRef = Arc::new(Int32Array::from(vec![5, 4, 3, 2, 1])); |
| /// |
| /// // Find the the top 2 items |
| /// let sorted_array = sort_limit(&array, None, Some(2)).unwrap(); |
| /// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap(); |
| /// assert_eq!(sorted_array, &Int32Array::from(vec![1, 2])); |
| /// |
| /// // Find the bottom top 2 items |
| /// let options = Some(SortOptions { |
| /// descending: true, |
| /// ..Default::default() |
| /// }); |
| /// let sorted_array = sort_limit(&array, options, Some(2)).unwrap(); |
| /// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap(); |
| /// assert_eq!(sorted_array, &Int32Array::from(vec![5, 4])); |
| /// # Ok(()) |
| /// # } |
| /// ``` |
| pub fn sort_limit( |
| values: &ArrayRef, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| ) -> Result<ArrayRef> { |
| let indices = sort_to_indices(values, options, limit)?; |
| take(values.as_ref(), &indices, None) |
| } |
| |
| /// we can only do this if the T is primitive |
| #[inline] |
| fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F) |
| where |
| F: FnMut(&T, &T) -> Ordering, |
| { |
| if array.len() == limit { |
| array.sort_unstable_by(cmp); |
| } else { |
| partial_sort(array, limit, cmp); |
| } |
| } |
| |
| fn cmp<T>(l: T, r: T) -> std::cmp::Ordering |
| where |
| T: Ord, |
| { |
| l.cmp(&r) |
| } |
| |
| // partition indices into valid and null indices |
| fn partition_validity(array: &ArrayRef) -> (Vec<u32>, Vec<u32>) { |
| match array.null_count() { |
| // faster path |
| 0 => ((0..(array.len() as u32)).collect(), vec![]), |
| _ => { |
| let indices = 0..(array.len() as u32); |
| indices.partition(|index| array.is_valid(*index as usize)) |
| } |
| } |
| } |
| |
| /// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices. |
| /// For floating point arrays any NaN values are considered to be greater than any other non-null value |
| /// limit is an option for partial_sort |
| pub fn sort_to_indices( |
| values: &ArrayRef, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| ) -> Result<UInt32Array> { |
| let options = options.unwrap_or_default(); |
| |
| let (v, n) = partition_validity(values); |
| |
| Ok(match values.data_type() { |
| DataType::Decimal128(_, _) => { |
| sort_primitive::<Decimal128Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Decimal256(_, _) => { |
| sort_primitive::<Decimal256Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Boolean => sort_boolean(values, v, n, &options, limit), |
| DataType::Int8 => { |
| sort_primitive::<Int8Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Int16 => { |
| sort_primitive::<Int16Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Int32 => { |
| sort_primitive::<Int32Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Int64 => { |
| sort_primitive::<Int64Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::UInt8 => { |
| sort_primitive::<UInt8Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::UInt16 => { |
| sort_primitive::<UInt16Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::UInt32 => { |
| sort_primitive::<UInt32Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::UInt64 => { |
| sort_primitive::<UInt64Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Float32 => sort_primitive::<Float32Type, _>( |
| values, |
| v, |
| n, |
| |x, y| x.total_cmp(&y), |
| &options, |
| limit, |
| ), |
| DataType::Float64 => sort_primitive::<Float64Type, _>( |
| values, |
| v, |
| n, |
| |x, y| x.total_cmp(&y), |
| &options, |
| limit, |
| ), |
| DataType::Date32 => { |
| sort_primitive::<Date32Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Date64 => { |
| sort_primitive::<Date64Type, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Time32(Second) => { |
| sort_primitive::<Time32SecondType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Time32(Millisecond) => { |
| sort_primitive::<Time32MillisecondType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Time64(Microsecond) => { |
| sort_primitive::<Time64MicrosecondType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Time64(Nanosecond) => { |
| sort_primitive::<Time64NanosecondType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Timestamp(Second, _) => { |
| sort_primitive::<TimestampSecondType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Timestamp(Millisecond, _) => { |
| sort_primitive::<TimestampMillisecondType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Timestamp(Microsecond, _) => { |
| sort_primitive::<TimestampMicrosecondType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Timestamp(Nanosecond, _) => { |
| sort_primitive::<TimestampNanosecondType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Interval(IntervalUnit::YearMonth) => { |
| sort_primitive::<IntervalYearMonthType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Interval(IntervalUnit::DayTime) => { |
| sort_primitive::<IntervalDayTimeType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Interval(IntervalUnit::MonthDayNano) => { |
| sort_primitive::<IntervalMonthDayNanoType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Duration(TimeUnit::Second) => { |
| sort_primitive::<DurationSecondType, _>(values, v, n, cmp, &options, limit) |
| } |
| DataType::Duration(TimeUnit::Millisecond) => { |
| sort_primitive::<DurationMillisecondType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Duration(TimeUnit::Microsecond) => { |
| sort_primitive::<DurationMicrosecondType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Duration(TimeUnit::Nanosecond) => { |
| sort_primitive::<DurationNanosecondType, _>( |
| values, v, n, cmp, &options, limit, |
| ) |
| } |
| DataType::Utf8 => sort_string::<i32>(values, v, n, &options, limit), |
| DataType::LargeUtf8 => sort_string::<i64>(values, v, n, &options, limit), |
| DataType::List(field) | DataType::FixedSizeList(field, _) => match field |
| .data_type() |
| { |
| DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, &options, limit), |
| DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, &options, limit), |
| DataType::Int32 => sort_list::<i32, Int32Type>(values, v, n, &options, limit), |
| DataType::Int64 => sort_list::<i32, Int64Type>(values, v, n, &options, limit), |
| DataType::UInt8 => sort_list::<i32, UInt8Type>(values, v, n, &options, limit), |
| DataType::UInt16 => { |
| sort_list::<i32, UInt16Type>(values, v, n, &options, limit) |
| } |
| DataType::UInt32 => { |
| sort_list::<i32, UInt32Type>(values, v, n, &options, limit) |
| } |
| DataType::UInt64 => { |
| sort_list::<i32, UInt64Type>(values, v, n, &options, limit) |
| } |
| DataType::Float32 => { |
| sort_list::<i32, Float32Type>(values, v, n, &options, limit) |
| } |
| DataType::Float64 => { |
| sort_list::<i32, Float64Type>(values, v, n, &options, limit) |
| } |
| t => { |
| return Err(ArrowError::ComputeError(format!( |
| "Sort not supported for list type {:?}", |
| t |
| ))); |
| } |
| }, |
| DataType::LargeList(field) => match field.data_type() { |
| DataType::Int8 => sort_list::<i64, Int8Type>(values, v, n, &options, limit), |
| DataType::Int16 => sort_list::<i64, Int16Type>(values, v, n, &options, limit), |
| DataType::Int32 => sort_list::<i64, Int32Type>(values, v, n, &options, limit), |
| DataType::Int64 => sort_list::<i64, Int64Type>(values, v, n, &options, limit), |
| DataType::UInt8 => sort_list::<i64, UInt8Type>(values, v, n, &options, limit), |
| DataType::UInt16 => { |
| sort_list::<i64, UInt16Type>(values, v, n, &options, limit) |
| } |
| DataType::UInt32 => { |
| sort_list::<i64, UInt32Type>(values, v, n, &options, limit) |
| } |
| DataType::UInt64 => { |
| sort_list::<i64, UInt64Type>(values, v, n, &options, limit) |
| } |
| DataType::Float32 => { |
| sort_list::<i64, Float32Type>(values, v, n, &options, limit) |
| } |
| DataType::Float64 => { |
| sort_list::<i64, Float64Type>(values, v, n, &options, limit) |
| } |
| t => { |
| return Err(ArrowError::ComputeError(format!( |
| "Sort not supported for list type {:?}", |
| t |
| ))); |
| } |
| }, |
| DataType::Dictionary(_, _) => { |
| let value_null_first = if options.descending { |
| // When sorting dictionary in descending order, we take inverse of of null ordering |
| // when sorting the values. Because if `nulls_first` is true, null must be in front |
| // of non-null value. As we take the sorted order of value array to sort dictionary |
| // keys, these null values will be treated as smallest ones and be sorted to the end |
| // of sorted result. So we set `nulls_first` to false when sorting dictionary value |
| // array to make them as largest ones, then null values will be put at the beginning |
| // of sorted dictionary result. |
| !options.nulls_first |
| } else { |
| options.nulls_first |
| }; |
| let value_options = Some(SortOptions { |
| descending: false, |
| nulls_first: value_null_first, |
| }); |
| downcast_dictionary_array!( |
| values => match values.values().data_type() { |
| dt if DataType::is_primitive(dt) => { |
| let dict_values = values.values(); |
| let sorted_value_indices = sort_to_indices(dict_values, value_options, None)?; |
| let value_indices_map = prepare_indices_map(&sorted_value_indices); |
| sort_primitive_dictionary::<_, _>(values, &value_indices_map, v, n, options, limit, cmp) |
| }, |
| DataType::Utf8 => { |
| let dict_values = values.values(); |
| let sorted_value_indices = sort_to_indices(dict_values, value_options, None)?; |
| let value_indices_map = prepare_indices_map(&sorted_value_indices); |
| sort_string_dictionary::<_>(values, &value_indices_map, v, n, &options, limit) |
| }, |
| t => return Err(ArrowError::ComputeError(format!( |
| "Unsupported dictionary value type {}", t |
| ))), |
| }, |
| t => return Err(ArrowError::ComputeError(format!( |
| "Unsupported datatype {}", t |
| ))), |
| ) |
| } |
| DataType::Binary | DataType::FixedSizeBinary(_) => { |
| sort_binary::<i32>(values, v, n, &options, limit) |
| } |
| DataType::LargeBinary => sort_binary::<i64>(values, v, n, &options, limit), |
| t => { |
| return Err(ArrowError::ComputeError(format!( |
| "Sort not supported for data type {:?}", |
| t |
| ))); |
| } |
| }) |
| } |
| |
| /// Options that define how sort kernels should behave |
| #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
| pub struct SortOptions { |
| /// Whether to sort in descending order |
| pub descending: bool, |
| /// Whether to sort nulls first |
| pub nulls_first: bool, |
| } |
| |
| impl Default for SortOptions { |
| fn default() -> Self { |
| Self { |
| descending: false, |
| // default to nulls first to match spark's behavior |
| nulls_first: true, |
| } |
| } |
| } |
| |
| /// Sort boolean values |
| /// |
| /// when a limit is present, the sort is pair-comparison based as k-select might be more efficient, |
| /// when the limit is absent, binary partition is used to speed up (which is linear). |
| /// |
| /// TODO maybe partition_validity call can be eliminated in this case |
| /// and [tri-color sort](https://en.wikipedia.org/wiki/Dutch_national_flag_problem) |
| /// can be used instead. |
| fn sort_boolean( |
| values: &ArrayRef, |
| value_indices: Vec<u32>, |
| mut null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array { |
| let values = values |
| .as_any() |
| .downcast_ref::<BooleanArray>() |
| .expect("Unable to downcast to boolean array"); |
| let descending = options.descending; |
| |
| let valids_len = value_indices.len(); |
| let nulls_len = null_indices.len(); |
| |
| let mut len = values.len(); |
| let valids = if let Some(limit) = limit { |
| len = limit.min(len); |
| // create tuples that are used for sorting |
| let mut valids = value_indices |
| .into_iter() |
| .map(|index| (index, values.value(index as usize))) |
| .collect::<Vec<(u32, bool)>>(); |
| |
| sort_valids(descending, &mut valids, &mut null_indices, len, cmp); |
| valids |
| } else { |
| // when limit is not present, we have a better way than sorting: we can just partition |
| // the vec into [false..., true...] or [true..., false...] when descending |
| // TODO when https://github.com/rust-lang/rust/issues/62543 is merged we can use partition_in_place |
| let (mut a, b): (Vec<_>, Vec<_>) = value_indices |
| .into_iter() |
| .map(|index| (index, values.value(index as usize))) |
| .partition(|(_, value)| *value == descending); |
| a.extend(b); |
| if descending { |
| null_indices.reverse(); |
| } |
| a |
| }; |
| |
| let nulls = null_indices; |
| |
| // collect results directly into a buffer instead of a vec to avoid another aligned allocation |
| let result_capacity = len * std::mem::size_of::<u32>(); |
| let mut result = MutableBuffer::new(result_capacity); |
| // sets len to capacity so we can access the whole buffer as a typed slice |
| result.resize(result_capacity, 0); |
| let result_slice: &mut [u32] = result.typed_data_mut(); |
| |
| if options.nulls_first { |
| let size = nulls_len.min(len); |
| result_slice[0..size].copy_from_slice(&nulls[0..size]); |
| if nulls_len < len { |
| insert_valid_values(result_slice, nulls_len, &valids[0..len - size]); |
| } |
| } else { |
| // nulls last |
| let size = valids.len().min(len); |
| insert_valid_values(result_slice, 0, &valids[0..size]); |
| if len > size { |
| result_slice[valids_len..].copy_from_slice(&nulls[0..(len - valids_len)]); |
| } |
| } |
| |
| let result_data = unsafe { |
| ArrayData::new_unchecked( |
| DataType::UInt32, |
| len, |
| Some(0), |
| None, |
| 0, |
| vec![result.into()], |
| vec![], |
| ) |
| }; |
| |
| UInt32Array::from(result_data) |
| } |
| |
| /// Sort primitive values |
| fn sort_primitive<T, F>( |
| values: &ArrayRef, |
| value_indices: Vec<u32>, |
| null_indices: Vec<u32>, |
| cmp: F, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array |
| where |
| T: ArrowPrimitiveType, |
| T::Native: std::cmp::PartialOrd, |
| F: Fn(T::Native, T::Native) -> std::cmp::Ordering, |
| { |
| // create tuples that are used for sorting |
| let valids = { |
| let values = as_primitive_array::<T>(values); |
| value_indices |
| .into_iter() |
| .map(|index| (index, values.value(index as usize))) |
| .collect::<Vec<(u32, T::Native)>>() |
| }; |
| sort_primitive_inner(values.len(), null_indices, cmp, options, limit, valids) |
| } |
| |
| /// A helper function used to convert sorted value indices to a map that we can look up sorted order |
| /// for a value index later. |
| fn prepare_indices_map(sorted_value_indices: &UInt32Array) -> HashMap<usize, u32> { |
| sorted_value_indices |
| .into_iter() |
| .enumerate() |
| .map(|(idx, index)| { |
| // Indices don't have None value |
| let index = index.unwrap(); |
| (index as usize, idx as u32) |
| }) |
| .collect::<HashMap<usize, u32>>() |
| } |
| |
| /// Sort dictionary encoded primitive values |
| fn sort_primitive_dictionary<K, F>( |
| values: &DictionaryArray<K>, |
| value_indices_map: &HashMap<usize, u32>, |
| value_indices: Vec<u32>, |
| null_indices: Vec<u32>, |
| options: SortOptions, |
| limit: Option<usize>, |
| cmp: F, |
| ) -> UInt32Array |
| where |
| K: ArrowDictionaryKeyType, |
| F: Fn(u32, u32) -> std::cmp::Ordering, |
| { |
| let keys: &PrimitiveArray<K> = values.keys(); |
| |
| // create tuples that are used for sorting |
| let valids = value_indices |
| .into_iter() |
| .map(|index| { |
| let key: K::Native = keys.value(index as usize); |
| let value_order = value_indices_map.get(&key.as_usize()).unwrap(); |
| (index, *value_order) |
| }) |
| .collect::<Vec<(u32, u32)>>(); |
| |
| sort_primitive_inner::<_, _>(keys.len(), null_indices, cmp, &options, limit, valids) |
| } |
| |
| // sort is instantiated a lot so we only compile this inner version for each native type |
| fn sort_primitive_inner<T, F>( |
| value_len: usize, |
| null_indices: Vec<u32>, |
| cmp: F, |
| options: &SortOptions, |
| limit: Option<usize>, |
| mut valids: Vec<(u32, T)>, |
| ) -> UInt32Array |
| where |
| T: ArrowNativeType, |
| T: std::cmp::PartialOrd, |
| F: Fn(T, T) -> std::cmp::Ordering, |
| { |
| let mut nulls = null_indices; |
| |
| let valids_len = valids.len(); |
| let nulls_len = nulls.len(); |
| let mut len = value_len; |
| |
| if let Some(limit) = limit { |
| len = limit.min(len); |
| } |
| |
| sort_valids(options.descending, &mut valids, &mut nulls, len, cmp); |
| |
| // collect results directly into a buffer instead of a vec to avoid another aligned allocation |
| let result_capacity = len * std::mem::size_of::<u32>(); |
| let mut result = MutableBuffer::new(result_capacity); |
| // sets len to capacity so we can access the whole buffer as a typed slice |
| result.resize(result_capacity, 0); |
| let result_slice: &mut [u32] = result.typed_data_mut(); |
| |
| if options.nulls_first { |
| let size = nulls_len.min(len); |
| result_slice[0..size].copy_from_slice(&nulls[0..size]); |
| if nulls_len < len { |
| insert_valid_values(result_slice, nulls_len, &valids[0..len - size]); |
| } |
| } else { |
| // nulls last |
| let size = valids.len().min(len); |
| insert_valid_values(result_slice, 0, &valids[0..size]); |
| if len > size { |
| result_slice[valids_len..].copy_from_slice(&nulls[0..(len - valids_len)]); |
| } |
| } |
| |
| let result_data = unsafe { |
| ArrayData::new_unchecked( |
| DataType::UInt32, |
| len, |
| Some(0), |
| None, |
| 0, |
| vec![result.into()], |
| vec![], |
| ) |
| }; |
| |
| UInt32Array::from(result_data) |
| } |
| |
| // insert valid and nan values in the correct order depending on the descending flag |
| fn insert_valid_values<T>(result_slice: &mut [u32], offset: usize, valids: &[(u32, T)]) { |
| let valids_len = valids.len(); |
| // helper to append the index part of the valid tuples |
| let append_valids = move |dst_slice: &mut [u32]| { |
| debug_assert_eq!(dst_slice.len(), valids_len); |
| dst_slice |
| .iter_mut() |
| .zip(valids.iter()) |
| .for_each(|(dst, src)| *dst = src.0) |
| }; |
| |
| append_valids(&mut result_slice[offset..offset + valids.len()]); |
| } |
| |
| /// Sort strings |
| fn sort_string<Offset: OffsetSizeTrait>( |
| values: &ArrayRef, |
| value_indices: Vec<u32>, |
| null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array { |
| let values = values |
| .as_any() |
| .downcast_ref::<GenericStringArray<Offset>>() |
| .unwrap(); |
| |
| sort_string_helper( |
| values, |
| value_indices, |
| null_indices, |
| options, |
| limit, |
| |array, idx| array.value(idx as usize), |
| ) |
| } |
| |
| /// Sort dictionary encoded strings |
| fn sort_string_dictionary<T: ArrowDictionaryKeyType>( |
| values: &DictionaryArray<T>, |
| value_indices_map: &HashMap<usize, u32>, |
| value_indices: Vec<u32>, |
| null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array { |
| let keys: &PrimitiveArray<T> = values.keys(); |
| |
| // create tuples that are used for sorting |
| let valids = value_indices |
| .into_iter() |
| .map(|index| { |
| let key: T::Native = keys.value(index as usize); |
| let value_order = value_indices_map.get(&key.as_usize()).unwrap(); |
| (index, *value_order) |
| }) |
| .collect::<Vec<(u32, u32)>>(); |
| |
| sort_primitive_inner::<_, _>(keys.len(), null_indices, cmp, options, limit, valids) |
| } |
| |
| /// shared implementation between dictionary encoded and plain string arrays |
| #[inline] |
| fn sort_string_helper<'a, A: Array, F>( |
| values: &'a A, |
| value_indices: Vec<u32>, |
| null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| value_fn: F, |
| ) -> UInt32Array |
| where |
| F: Fn(&'a A, u32) -> &str, |
| { |
| let mut valids = value_indices |
| .into_iter() |
| .map(|index| (index, value_fn(values, index))) |
| .collect::<Vec<(u32, &str)>>(); |
| let mut nulls = null_indices; |
| let descending = options.descending; |
| let mut len = values.len(); |
| |
| if let Some(limit) = limit { |
| len = limit.min(len); |
| } |
| |
| sort_valids(descending, &mut valids, &mut nulls, len, cmp); |
| // collect the order of valid tuplies |
| let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect(); |
| |
| if options.nulls_first { |
| nulls.append(&mut valid_indices); |
| nulls.truncate(len); |
| UInt32Array::from(nulls) |
| } else { |
| // no need to sort nulls as they are in the correct order already |
| valid_indices.append(&mut nulls); |
| valid_indices.truncate(len); |
| UInt32Array::from(valid_indices) |
| } |
| } |
| |
| fn sort_list<S, T>( |
| values: &ArrayRef, |
| value_indices: Vec<u32>, |
| null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array |
| where |
| S: OffsetSizeTrait, |
| T: ArrowPrimitiveType, |
| T::Native: std::cmp::PartialOrd, |
| { |
| sort_list_inner::<S>(values, value_indices, null_indices, options, limit) |
| } |
| |
| fn sort_list_inner<S>( |
| values: &ArrayRef, |
| value_indices: Vec<u32>, |
| mut null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array |
| where |
| S: OffsetSizeTrait, |
| { |
| let mut valids: Vec<(u32, ArrayRef)> = values |
| .as_any() |
| .downcast_ref::<FixedSizeListArray>() |
| .map_or_else( |
| || { |
| let values = as_generic_list_array::<S>(values); |
| value_indices |
| .iter() |
| .copied() |
| .map(|index| (index, values.value(index as usize))) |
| .collect() |
| }, |
| |values| { |
| value_indices |
| .iter() |
| .copied() |
| .map(|index| (index, values.value(index as usize))) |
| .collect() |
| }, |
| ); |
| |
| let mut len = values.len(); |
| let descending = options.descending; |
| |
| if let Some(limit) = limit { |
| len = limit.min(len); |
| } |
| sort_valids_array(descending, &mut valids, &mut null_indices, len); |
| |
| let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect(); |
| if options.nulls_first { |
| null_indices.append(&mut valid_indices); |
| null_indices.truncate(len); |
| UInt32Array::from(null_indices) |
| } else { |
| valid_indices.append(&mut null_indices); |
| valid_indices.truncate(len); |
| UInt32Array::from(valid_indices) |
| } |
| } |
| |
| fn sort_binary<S>( |
| values: &ArrayRef, |
| value_indices: Vec<u32>, |
| mut null_indices: Vec<u32>, |
| options: &SortOptions, |
| limit: Option<usize>, |
| ) -> UInt32Array |
| where |
| S: OffsetSizeTrait, |
| { |
| let mut valids: Vec<(u32, &[u8])> = values |
| .as_any() |
| .downcast_ref::<FixedSizeBinaryArray>() |
| .map_or_else( |
| || { |
| let values = as_generic_binary_array::<S>(values); |
| value_indices |
| .iter() |
| .copied() |
| .map(|index| (index, values.value(index as usize))) |
| .collect() |
| }, |
| |values| { |
| value_indices |
| .iter() |
| .copied() |
| .map(|index| (index, values.value(index as usize))) |
| .collect() |
| }, |
| ); |
| |
| let mut len = values.len(); |
| let descending = options.descending; |
| |
| if let Some(limit) = limit { |
| len = limit.min(len); |
| } |
| |
| sort_valids(descending, &mut valids, &mut null_indices, len, cmp); |
| |
| let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect(); |
| if options.nulls_first { |
| null_indices.append(&mut valid_indices); |
| null_indices.truncate(len); |
| UInt32Array::from(null_indices) |
| } else { |
| valid_indices.append(&mut null_indices); |
| valid_indices.truncate(len); |
| UInt32Array::from(valid_indices) |
| } |
| } |
| |
| /// Compare two `Array`s based on the ordering defined in [build_compare] |
| fn cmp_array(a: &dyn Array, b: &dyn Array) -> Ordering { |
| let cmp_op = build_compare(a, b).unwrap(); |
| let length = a.len().max(b.len()); |
| |
| for i in 0..length { |
| let result = cmp_op(i, i); |
| if result != Ordering::Equal { |
| return result; |
| } |
| } |
| Ordering::Equal |
| } |
| |
| /// One column to be used in lexicographical sort |
| #[derive(Clone, Debug)] |
| pub struct SortColumn { |
| pub values: ArrayRef, |
| pub options: Option<SortOptions>, |
| } |
| |
| /// Sort a list of `ArrayRef` using `SortOptions` provided for each array. |
| /// |
| /// Performs a stable lexicographical sort on values and indices. |
| /// |
| /// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by |
| /// `lexsort_to_indices` or `take`. |
| /// |
| /// Example: |
| /// |
| /// ``` |
| /// use std::convert::From; |
| /// use std::sync::Arc; |
| /// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, as_primitive_array}; |
| /// use arrow::compute::kernels::sort::{SortColumn, SortOptions, lexsort}; |
| /// use arrow::datatypes::Int64Type; |
| /// |
| /// let sorted_columns = lexsort(&vec![ |
| /// SortColumn { |
| /// values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| /// None, |
| /// Some(-2), |
| /// Some(89), |
| /// Some(-64), |
| /// Some(101), |
| /// ])) as ArrayRef, |
| /// options: None, |
| /// }, |
| /// SortColumn { |
| /// values: Arc::new(StringArray::from(vec![ |
| /// Some("hello"), |
| /// Some("world"), |
| /// Some(","), |
| /// Some("foobar"), |
| /// Some("!"), |
| /// ])) as ArrayRef, |
| /// options: Some(SortOptions { |
| /// descending: true, |
| /// nulls_first: false, |
| /// }), |
| /// }, |
| /// ], None).unwrap(); |
| /// |
| /// assert_eq!(as_primitive_array::<Int64Type>(&sorted_columns[0]).value(1), -64); |
| /// assert!(sorted_columns[0].is_null(0)); |
| /// ``` |
| /// |
| /// Note: for multi-column sorts without a limit, using the [row format][crate::row] |
| /// may be significantly faster |
| /// |
| pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>> { |
| let indices = lexsort_to_indices(columns, limit)?; |
| columns |
| .iter() |
| .map(|c| take(c.values.as_ref(), &indices, None)) |
| .collect() |
| } |
| |
| /// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer |
| /// (`UInt32Array`) of indices. |
| /// |
| /// Note: for multi-column sorts without a limit, using the [row format][crate::row] |
| /// may be significantly faster |
| pub fn lexsort_to_indices( |
| columns: &[SortColumn], |
| limit: Option<usize>, |
| ) -> Result<UInt32Array> { |
| if columns.is_empty() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Sort requires at least one column".to_string(), |
| )); |
| } |
| if columns.len() == 1 { |
| // fallback to non-lexical sort |
| let column = &columns[0]; |
| return sort_to_indices(&column.values, column.options, limit); |
| } |
| |
| let row_count = columns[0].values.len(); |
| if columns.iter().any(|item| item.values.len() != row_count) { |
| return Err(ArrowError::ComputeError( |
| "lexical sort columns have different row counts".to_string(), |
| )); |
| }; |
| |
| let mut value_indices = (0..row_count).collect::<Vec<usize>>(); |
| let mut len = value_indices.len(); |
| |
| if let Some(limit) = limit { |
| len = limit.min(len); |
| } |
| |
| let lexicographical_comparator = LexicographicalComparator::try_new(columns)?; |
| // uint32 can be sorted unstably |
| sort_unstable_by(&mut value_indices, len, |a, b| { |
| lexicographical_comparator.compare(a, b) |
| }); |
| |
| Ok(UInt32Array::from_iter_values( |
| value_indices.iter().take(len).map(|i| *i as u32), |
| )) |
| } |
| |
| /// It's unstable_sort, may not preserve the order of equal elements |
| pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F) |
| where |
| F: FnMut(&T, &T) -> Ordering, |
| { |
| let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less); |
| before.sort_unstable_by(is_less); |
| } |
| |
| type LexicographicalCompareItem<'a> = ( |
| &'a ArrayData, // data |
| DynComparator, // comparator |
| SortOptions, // sort_option |
| ); |
| |
| /// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data |
| /// at given two indices. The lifetime is the same at the data wrapped. |
| pub(crate) struct LexicographicalComparator<'a> { |
| compare_items: Vec<LexicographicalCompareItem<'a>>, |
| } |
| |
| impl LexicographicalComparator<'_> { |
| /// lexicographically compare values at the wrapped columns with given indices. |
| pub(crate) fn compare<'a, 'b>( |
| &'a self, |
| a_idx: &'b usize, |
| b_idx: &'b usize, |
| ) -> Ordering { |
| for (data, comparator, sort_option) in &self.compare_items { |
| match (data.is_valid(*a_idx), data.is_valid(*b_idx)) { |
| (true, true) => { |
| match (comparator)(*a_idx, *b_idx) { |
| // equal, move on to next column |
| Ordering::Equal => continue, |
| order => { |
| if sort_option.descending { |
| return order.reverse(); |
| } else { |
| return order; |
| } |
| } |
| } |
| } |
| (false, true) => { |
| return if sort_option.nulls_first { |
| Ordering::Less |
| } else { |
| Ordering::Greater |
| }; |
| } |
| (true, false) => { |
| return if sort_option.nulls_first { |
| Ordering::Greater |
| } else { |
| Ordering::Less |
| }; |
| } |
| // equal, move on to next column |
| (false, false) => continue, |
| } |
| } |
| |
| Ordering::Equal |
| } |
| |
| /// Create a new lex comparator that will wrap the given sort columns and give comparison |
| /// results with two indices. |
| pub(crate) fn try_new( |
| columns: &[SortColumn], |
| ) -> Result<LexicographicalComparator<'_>> { |
| let compare_items = columns |
| .iter() |
| .map(|column| { |
| // flatten and convert build comparators |
| // use ArrayData for is_valid checks later to avoid dynamic call |
| let values = column.values.as_ref(); |
| let data = values.data_ref(); |
| Ok(( |
| data, |
| build_compare(values, values)?, |
| column.options.unwrap_or_default(), |
| )) |
| }) |
| .collect::<Result<Vec<_>>>()?; |
| Ok(LexicographicalComparator { compare_items }) |
| } |
| } |
| |
| fn sort_valids<T, U>( |
| descending: bool, |
| valids: &mut [(u32, T)], |
| nulls: &mut [U], |
| len: usize, |
| mut cmp: impl FnMut(T, T) -> Ordering, |
| ) where |
| T: ?Sized + Copy, |
| { |
| let valids_len = valids.len(); |
| if !descending { |
| sort_unstable_by(valids, len.min(valids_len), |a, b| cmp(a.1, b.1)); |
| } else { |
| sort_unstable_by(valids, len.min(valids_len), |a, b| cmp(a.1, b.1).reverse()); |
| // reverse to keep a stable ordering |
| nulls.reverse(); |
| } |
| } |
| |
| fn sort_valids_array<T>( |
| descending: bool, |
| valids: &mut [(u32, ArrayRef)], |
| nulls: &mut [T], |
| len: usize, |
| ) { |
| let valids_len = valids.len(); |
| if !descending { |
| sort_unstable_by(valids, len.min(valids_len), |a, b| { |
| cmp_array(a.1.as_ref(), b.1.as_ref()) |
| }); |
| } else { |
| sort_unstable_by(valids, len.min(valids_len), |a, b| { |
| cmp_array(a.1.as_ref(), b.1.as_ref()).reverse() |
| }); |
| // reverse to keep a stable ordering |
| nulls.reverse(); |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use arrow_buffer::i256; |
| use rand::rngs::StdRng; |
| use rand::{Rng, RngCore, SeedableRng}; |
| use std::convert::TryFrom; |
| use std::sync::Arc; |
| |
| fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array { |
| data.into_iter() |
| .collect::<Decimal128Array>() |
| .with_precision_and_scale(23, 6) |
| .unwrap() |
| } |
| |
| fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array { |
| data.into_iter() |
| .collect::<Decimal256Array>() |
| .with_precision_and_scale(53, 6) |
| .unwrap() |
| } |
| |
| fn test_sort_to_indices_decimal128_array( |
| data: Vec<Option<i128>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<u32>, |
| ) { |
| let output = create_decimal128_array(data); |
| let expected = UInt32Array::from(expected_data); |
| let output = |
| sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap(); |
| assert_eq!(output, expected) |
| } |
| |
| fn test_sort_to_indices_decimal256_array( |
| data: Vec<Option<i256>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<u32>, |
| ) { |
| let output = create_decimal256_array(data); |
| let expected = UInt32Array::from(expected_data); |
| let output = |
| sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap(); |
| assert_eq!(output, expected) |
| } |
| |
| fn test_sort_decimal128_array( |
| data: Vec<Option<i128>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<i128>>, |
| ) { |
| let output = create_decimal128_array(data); |
| let expected = Arc::new(create_decimal128_array(expected_data)) as ArrayRef; |
| let output = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(), |
| }; |
| assert_eq!(&output, &expected) |
| } |
| |
| fn test_sort_decimal256_array( |
| data: Vec<Option<i256>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<i256>>, |
| ) { |
| let output = create_decimal256_array(data); |
| let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef; |
| let output = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(), |
| }; |
| assert_eq!(&output, &expected) |
| } |
| |
| fn test_sort_to_indices_boolean_arrays( |
| data: Vec<Option<bool>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<u32>, |
| ) { |
| let output = BooleanArray::from(data); |
| let expected = UInt32Array::from(expected_data); |
| let output = |
| sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap(); |
| assert_eq!(output, expected) |
| } |
| |
| fn test_sort_to_indices_primitive_arrays<T>( |
| data: Vec<Option<T::Native>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<u32>, |
| ) where |
| T: ArrowPrimitiveType, |
| PrimitiveArray<T>: From<Vec<Option<T::Native>>>, |
| { |
| let output = PrimitiveArray::<T>::from(data); |
| let expected = UInt32Array::from(expected_data); |
| let output = |
| sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap(); |
| assert_eq!(output, expected) |
| } |
| |
| fn test_sort_primitive_arrays<T>( |
| data: Vec<Option<T::Native>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<T::Native>>, |
| ) where |
| T: ArrowPrimitiveType, |
| PrimitiveArray<T>: From<Vec<Option<T::Native>>>, |
| { |
| let output = PrimitiveArray::<T>::from(data); |
| let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef; |
| let output = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(), |
| }; |
| assert_eq!(&output, &expected) |
| } |
| |
| fn test_sort_to_indices_string_arrays( |
| data: Vec<Option<&str>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<u32>, |
| ) { |
| let output = StringArray::from(data); |
| let expected = UInt32Array::from(expected_data); |
| let output = |
| sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap(); |
| assert_eq!(output, expected) |
| } |
| |
| /// Tests both Utf8 and LargeUtf8 |
| fn test_sort_string_arrays( |
| data: Vec<Option<&str>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<&str>>, |
| ) { |
| let output = StringArray::from(data.clone()); |
| let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef; |
| let output = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(), |
| }; |
| assert_eq!(&output, &expected); |
| |
| let output = LargeStringArray::from(data); |
| let expected = Arc::new(LargeStringArray::from(expected_data)) as ArrayRef; |
| let output = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(), |
| }; |
| assert_eq!(&output, &expected) |
| } |
| |
| fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>( |
| data: Vec<Option<&str>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<&str>>, |
| ) { |
| let array = data.into_iter().collect::<DictionaryArray<T>>(); |
| let array_values = array.values().clone(); |
| let dict = array_values |
| .as_any() |
| .downcast_ref::<StringArray>() |
| .expect("Unable to get dictionary values"); |
| |
| let sorted = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(), |
| }; |
| let sorted = sorted |
| .as_any() |
| .downcast_ref::<DictionaryArray<T>>() |
| .unwrap(); |
| let sorted_values = sorted.values(); |
| let sorted_dict = sorted_values |
| .as_any() |
| .downcast_ref::<StringArray>() |
| .expect("Unable to get dictionary values"); |
| let sorted_keys = sorted.keys(); |
| |
| assert_eq!(sorted_dict, dict); |
| |
| let sorted_strings = StringArray::try_from( |
| (0..sorted.len()) |
| .map(|i| { |
| if sorted.is_valid(i) { |
| Some(sorted_dict.value(sorted_keys.value(i).as_usize())) |
| } else { |
| None |
| } |
| }) |
| .collect::<Vec<Option<&str>>>(), |
| ) |
| .expect("Unable to create string array from dictionary"); |
| let expected = |
| StringArray::try_from(expected_data).expect("Unable to create string array"); |
| |
| assert_eq!(sorted_strings, expected) |
| } |
| |
| fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>( |
| keys: PrimitiveArray<K>, |
| values: PrimitiveArray<T>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<T::Native>>, |
| ) where |
| PrimitiveArray<T>: From<Vec<Option<T::Native>>>, |
| { |
| let array = DictionaryArray::<K>::try_new(&keys, &values).unwrap(); |
| let array_values = array.values().clone(); |
| let dict = array_values |
| .as_any() |
| .downcast_ref::<PrimitiveArray<T>>() |
| .expect("Unable to get dictionary values"); |
| |
| let sorted = match limit { |
| Some(_) => { |
| sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap() |
| } |
| _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(), |
| }; |
| let sorted = sorted |
| .as_any() |
| .downcast_ref::<DictionaryArray<K>>() |
| .unwrap(); |
| let sorted_values = sorted.values(); |
| let sorted_dict = sorted_values |
| .as_any() |
| .downcast_ref::<PrimitiveArray<T>>() |
| .expect("Unable to get dictionary values"); |
| let sorted_keys = sorted.keys(); |
| |
| assert_eq!(sorted_dict, dict); |
| |
| let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from( |
| (0..sorted.len()) |
| .map(|i| { |
| let key = sorted_keys.value(i).as_usize(); |
| if sorted.is_valid(i) && sorted_dict.is_valid(key) { |
| Some(sorted_dict.value(key)) |
| } else { |
| None |
| } |
| }) |
| .collect::<Vec<Option<T::Native>>>(), |
| ); |
| let expected: PrimitiveArray<T> = |
| From::<Vec<Option<T::Native>>>::from(expected_data); |
| |
| assert_eq!(sorted_values, expected) |
| } |
| |
| fn test_sort_list_arrays<T>( |
| data: Vec<Option<Vec<Option<T::Native>>>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<Vec<Option<T::Native>>>>, |
| fixed_length: Option<i32>, |
| ) where |
| T: ArrowPrimitiveType, |
| PrimitiveArray<T>: From<Vec<Option<T::Native>>>, |
| { |
| // for FixedSizedList |
| if let Some(length) = fixed_length { |
| let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>( |
| data.clone(), |
| length, |
| )); |
| let sorted = match limit { |
| Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), |
| _ => sort(&(input as ArrayRef), options).unwrap(), |
| }; |
| let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>( |
| expected_data.clone(), |
| length, |
| )) as ArrayRef; |
| |
| assert_eq!(&sorted, &expected); |
| } |
| |
| // for List |
| let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone())); |
| let sorted = match limit { |
| Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), |
| _ => sort(&(input as ArrayRef), options).unwrap(), |
| }; |
| let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>( |
| expected_data.clone(), |
| )) as ArrayRef; |
| |
| assert_eq!(&sorted, &expected); |
| |
| // for LargeList |
| let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data)); |
| let sorted = match limit { |
| Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), |
| _ => sort(&(input as ArrayRef), options).unwrap(), |
| }; |
| let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>( |
| expected_data, |
| )) as ArrayRef; |
| |
| assert_eq!(&sorted, &expected); |
| } |
| |
| fn test_lex_sort_arrays( |
| input: Vec<SortColumn>, |
| expected_output: Vec<ArrayRef>, |
| limit: Option<usize>, |
| ) { |
| let sorted = lexsort(&input, limit).unwrap(); |
| |
| for (result, expected) in sorted.iter().zip(expected_output.iter()) { |
| assert_eq!(result, expected); |
| } |
| } |
| |
| /// slice all arrays in expected_output to offset/length |
| fn slice_arrays( |
| expected_output: Vec<ArrayRef>, |
| offset: usize, |
| length: usize, |
| ) -> Vec<ArrayRef> { |
| expected_output |
| .into_iter() |
| .map(|array| array.slice(offset, length)) |
| .collect() |
| } |
| |
| fn test_sort_binary_arrays( |
| data: Vec<Option<Vec<u8>>>, |
| options: Option<SortOptions>, |
| limit: Option<usize>, |
| expected_data: Vec<Option<Vec<u8>>>, |
| fixed_length: Option<i32>, |
| ) { |
| // Fixed size binary array |
| if fixed_length.is_some() { |
| let input = Arc::new( |
| FixedSizeBinaryArray::try_from_sparse_iter(data.iter().cloned()).unwrap(), |
| ); |
| let sorted = match limit { |
| Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), |
| None => sort(&(input as ArrayRef), options).unwrap(), |
| }; |
| let expected = Arc::new( |
| FixedSizeBinaryArray::try_from_sparse_iter(expected_data.iter().cloned()) |
| .unwrap(), |
| ) as ArrayRef; |
| |
| assert_eq!(&sorted, &expected); |
| } |
| |
| // Generic size binary array |
| fn make_generic_binary_array<S: OffsetSizeTrait>( |
| data: &[Option<Vec<u8>>], |
| ) -> Arc<GenericBinaryArray<S>> { |
| Arc::new(GenericBinaryArray::<S>::from_opt_vec( |
| data.iter() |
| .map(|binary| binary.as_ref().map(Vec::as_slice)) |
| .collect(), |
| )) |
| } |
| |
| // BinaryArray |
| let input = make_generic_binary_array::<i32>(&data); |
| let sorted = match limit { |
| Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), |
| None => sort(&(input as ArrayRef), options).unwrap(), |
| }; |
| let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef; |
| assert_eq!(&sorted, &expected); |
| |
| // LargeBinaryArray |
| let input = make_generic_binary_array::<i64>(&data); |
| let sorted = match limit { |
| Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), |
| None => sort(&(input as ArrayRef), options).unwrap(), |
| }; |
| let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef; |
| assert_eq!(&sorted, &expected); |
| } |
| |
| #[test] |
| fn test_sort_to_indices_primitives() { |
| test_sort_to_indices_primitive_arrays::<Int8Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| None, |
| None, |
| vec![0, 5, 3, 1, 4, 2], |
| ); |
| test_sort_to_indices_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| None, |
| None, |
| vec![0, 5, 3, 1, 4, 2], |
| ); |
| test_sort_to_indices_primitive_arrays::<Int32Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| None, |
| None, |
| vec![0, 5, 3, 1, 4, 2], |
| ); |
| test_sort_to_indices_primitive_arrays::<Int64Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| None, |
| None, |
| vec![0, 5, 3, 1, 4, 2], |
| ); |
| test_sort_to_indices_primitive_arrays::<Float32Type>( |
| vec![ |
| None, |
| Some(-0.05), |
| Some(2.225), |
| Some(-1.01), |
| Some(-0.05), |
| None, |
| ], |
| None, |
| None, |
| vec![0, 5, 3, 1, 4, 2], |
| ); |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![ |
| None, |
| Some(-0.05), |
| Some(2.225), |
| Some(-1.01), |
| Some(-0.05), |
| None, |
| ], |
| None, |
| None, |
| vec![0, 5, 3, 1, 4, 2], |
| ); |
| |
| // descending |
| test_sort_to_indices_primitive_arrays::<Int8Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 1, 4, 3, 5, 0], // [2, 4, 1, 3, 5, 0] |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 1, 4, 3, 5, 0], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Int32Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 1, 4, 3, 5, 0], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Int64Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 1, 4, 3, 5, 0], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Float32Type>( |
| vec![ |
| None, |
| Some(0.005), |
| Some(20.22), |
| Some(-10.3), |
| Some(0.005), |
| None, |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 1, 4, 3, 5, 0], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 1, 4, 3, 5, 0], |
| ); |
| |
| // descending, nulls first |
| test_sort_to_indices_primitive_arrays::<Int8Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3] |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3] |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Int32Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 1, 4, 3], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Int64Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 1, 4, 3], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Float32Type>( |
| vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 1, 4, 3], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 1, 4, 3], |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), None, None, Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![3, 0, 1], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), None, None, Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![1, 2, 3], |
| ); |
| |
| // more nulls than limit |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![Some(1.0), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![1, 2], |
| ); |
| |
| test_sort_to_indices_primitive_arrays::<Float64Type>( |
| vec![Some(1.0), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![0, 1], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_to_indices_primitive_more_nulls_than_limit() { |
| test_sort_to_indices_primitive_arrays::<Int32Type>( |
| vec![None, None, Some(3), None, Some(1), None, Some(2)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![4, 6], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_boolean() { |
| // boolean |
| test_sort_to_indices_boolean_arrays( |
| vec![None, Some(false), Some(true), Some(true), Some(false), None], |
| None, |
| None, |
| vec![0, 5, 1, 4, 2, 3], |
| ); |
| |
| // boolean, descending |
| test_sort_to_indices_boolean_arrays( |
| vec![None, Some(false), Some(true), Some(true), Some(false), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 3, 1, 4, 5, 0], |
| ); |
| |
| // boolean, descending, nulls first |
| test_sort_to_indices_boolean_arrays( |
| vec![None, Some(false), Some(true), Some(true), Some(false), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![5, 0, 2, 3, 1, 4], |
| ); |
| |
| // boolean, descending, nulls first, limit |
| test_sort_to_indices_boolean_arrays( |
| vec![None, Some(false), Some(true), Some(true), Some(false), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![5, 0, 2], |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_to_indices_boolean_arrays( |
| vec![Some(true), None, None, Some(false)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![3, 0, 1], |
| ); |
| |
| test_sort_to_indices_boolean_arrays( |
| vec![Some(true), None, None, Some(false)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![1, 2, 3], |
| ); |
| |
| // more nulls than limit |
| test_sort_to_indices_boolean_arrays( |
| vec![Some(true), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![1, 2], |
| ); |
| |
| test_sort_to_indices_boolean_arrays( |
| vec![Some(true), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![0, 1], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_indices_decimal128() { |
| // decimal default |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| None, |
| None, |
| vec![0, 6, 4, 2, 3, 5, 1], |
| ); |
| // decimal descending |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![1, 5, 3, 2, 4, 6, 0], |
| ); |
| // decimal null_first and descending |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![6, 0, 1, 5, 3, 2, 4], |
| ); |
| // decimal null_first |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![0, 6, 4, 2, 3, 5, 1], |
| ); |
| // limit |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| None, |
| Some(3), |
| vec![0, 6, 4], |
| ); |
| // limit descending |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![1, 5, 3], |
| ); |
| // limit descending null_first |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![6, 0, 1], |
| ); |
| // limit null_first |
| test_sort_to_indices_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![0, 6, 4], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_indices_decimal256() { |
| // decimal default |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| None, |
| None, |
| vec![0, 6, 4, 2, 3, 5, 1], |
| ); |
| // decimal descending |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![1, 5, 3, 2, 4, 6, 0], |
| ); |
| // decimal null_first and descending |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![6, 0, 1, 5, 3, 2, 4], |
| ); |
| // decimal null_first |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![0, 6, 4, 2, 3, 5, 1], |
| ); |
| // limit |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| None, |
| Some(3), |
| vec![0, 6, 4], |
| ); |
| // limit descending |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![1, 5, 3], |
| ); |
| // limit descending null_first |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![6, 0, 1], |
| ); |
| // limit null_first |
| test_sort_to_indices_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![0, 6, 4], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_indices_decimal256_max_min() { |
| test_sort_to_indices_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![0, 1, 4, 2, 3], |
| ); |
| |
| test_sort_to_indices_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![0, 3, 2, 4, 1], |
| ); |
| |
| test_sort_to_indices_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(4), |
| vec![0, 1, 4, 2], |
| ); |
| |
| test_sort_to_indices_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(4), |
| vec![0, 3, 2, 4], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_decimal128() { |
| // decimal default |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| None, |
| None, |
| vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)], |
| ); |
| // decimal descending |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None], |
| ); |
| // decimal null_first and descending |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)], |
| ); |
| // decimal null_first |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)], |
| ); |
| // limit |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| None, |
| Some(3), |
| vec![None, None, Some(1)], |
| ); |
| // limit descending |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![Some(5), Some(4), Some(3)], |
| ); |
| // limit descending null_first |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(5)], |
| ); |
| // limit null_first |
| test_sort_decimal128_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(1)], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_decimal256() { |
| // decimal default |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| None, |
| None, |
| vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // decimal descending |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // decimal null_first and descending |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // decimal null_first |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // limit |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| None, |
| Some(3), |
| vec![None, None, Some(1)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // limit descending |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![Some(5), Some(4), Some(3)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // limit descending null_first |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(5)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| // limit null_first |
| test_sort_decimal256_array( |
| vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(1)] |
| .iter() |
| .map(|v| v.map(i256::from_i128)) |
| .collect(), |
| ); |
| } |
| |
| #[test] |
| fn test_sort_decimal256_max_min() { |
| test_sort_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(-1)), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| ], |
| ); |
| |
| test_sort_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| None, |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some(i256::MAX), |
| Some(i256::from_i128(1)), |
| Some(i256::from_i128(-1)), |
| Some(i256::MIN), |
| ], |
| ); |
| |
| test_sort_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(4), |
| vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))], |
| ); |
| |
| test_sort_decimal256_array( |
| vec![ |
| None, |
| Some(i256::MIN), |
| Some(i256::from_i128(1)), |
| Some(i256::MAX), |
| Some(i256::from_i128(-1)), |
| None, |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(4), |
| vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_primitives() { |
| // default case |
| test_sort_primitive_arrays::<UInt8Type>( |
| vec![None, Some(3), Some(5), Some(2), Some(3), None], |
| None, |
| None, |
| vec![None, None, Some(2), Some(3), Some(3), Some(5)], |
| ); |
| test_sort_primitive_arrays::<UInt16Type>( |
| vec![None, Some(3), Some(5), Some(2), Some(3), None], |
| None, |
| None, |
| vec![None, None, Some(2), Some(3), Some(3), Some(5)], |
| ); |
| test_sort_primitive_arrays::<UInt32Type>( |
| vec![None, Some(3), Some(5), Some(2), Some(3), None], |
| None, |
| None, |
| vec![None, None, Some(2), Some(3), Some(3), Some(5)], |
| ); |
| test_sort_primitive_arrays::<UInt64Type>( |
| vec![None, Some(3), Some(5), Some(2), Some(3), None], |
| None, |
| None, |
| vec![None, None, Some(2), Some(3), Some(3), Some(5)], |
| ); |
| |
| // descending |
| test_sort_primitive_arrays::<Int8Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(2), Some(0), Some(0), Some(-1), None, None], |
| ); |
| test_sort_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(2), Some(0), Some(0), Some(-1), None, None], |
| ); |
| test_sort_primitive_arrays::<Int32Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(2), Some(0), Some(0), Some(-1), None, None], |
| ); |
| test_sort_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(2), Some(0), Some(0), Some(-1), None, None], |
| ); |
| |
| // descending, nulls first |
| test_sort_primitive_arrays::<Int8Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(2), Some(0), Some(0), Some(-1)], |
| ); |
| test_sort_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(2), Some(0), Some(0), Some(-1)], |
| ); |
| test_sort_primitive_arrays::<Int32Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(2), Some(0), Some(0), Some(-1)], |
| ); |
| test_sort_primitive_arrays::<Int64Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(2), Some(0), Some(0), Some(-1)], |
| ); |
| |
| test_sort_primitive_arrays::<Int64Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(2)], |
| ); |
| |
| test_sort_primitive_arrays::<Float32Type>( |
| vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)], |
| ); |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)], |
| ); |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)], |
| ); |
| |
| // int8 nulls first |
| test_sort_primitive_arrays::<Int8Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(-1), Some(0), Some(0), Some(2)], |
| ); |
| test_sort_primitive_arrays::<Int16Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(-1), Some(0), Some(0), Some(2)], |
| ); |
| test_sort_primitive_arrays::<Int32Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(-1), Some(0), Some(0), Some(2)], |
| ); |
| test_sort_primitive_arrays::<Int64Type>( |
| vec![None, Some(0), Some(2), Some(-1), Some(0), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(-1), Some(0), Some(0), Some(2)], |
| ); |
| test_sort_primitive_arrays::<Float32Type>( |
| vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)], |
| ); |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)], |
| ); |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)], |
| ); |
| |
| // limit |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![Some(1.0), Some(f64::NAN)], |
| ); |
| |
| // limit with actual value |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![Some(1.0), Some(2.0), Some(3.0)], |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), None, None, Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![Some(1.0), Some(2.0), None], |
| ); |
| |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), None, None, Some(1.0)], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(1.0)], |
| ); |
| |
| // more nulls than limit |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![None, None], |
| ); |
| |
| test_sort_primitive_arrays::<Float64Type>( |
| vec![Some(2.0), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![Some(2.0), None], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_to_indices_strings() { |
| test_sort_to_indices_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| None, |
| None, |
| vec![0, 3, 5, 1, 4, 2], |
| ); |
| |
| test_sort_to_indices_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![2, 4, 1, 5, 3, 0], |
| ); |
| |
| test_sort_to_indices_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![0, 3, 5, 1, 4, 2], |
| ); |
| |
| test_sort_to_indices_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![3, 0, 2, 4, 1, 5], |
| ); |
| |
| test_sort_to_indices_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![3, 0, 2], |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_to_indices_string_arrays( |
| vec![Some("def"), None, None, Some("abc")], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![3, 0, 1], |
| ); |
| |
| test_sort_to_indices_string_arrays( |
| vec![Some("def"), None, None, Some("abc")], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![1, 2, 3], |
| ); |
| |
| // more nulls than limit |
| test_sort_to_indices_string_arrays( |
| vec![Some("def"), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![1, 2], |
| ); |
| |
| test_sort_to_indices_string_arrays( |
| vec![Some("def"), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![0, 1], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_strings() { |
| test_sort_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| None, |
| None, |
| vec![ |
| None, |
| None, |
| Some("-ad"), |
| Some("bad"), |
| Some("glad"), |
| Some("sad"), |
| ], |
| ); |
| |
| test_sort_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some("sad"), |
| Some("glad"), |
| Some("bad"), |
| Some("-ad"), |
| None, |
| None, |
| ], |
| ); |
| |
| test_sort_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some("-ad"), |
| Some("bad"), |
| Some("glad"), |
| Some("sad"), |
| ], |
| ); |
| |
| test_sort_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some("sad"), |
| Some("glad"), |
| Some("bad"), |
| Some("-ad"), |
| ], |
| ); |
| |
| test_sort_string_arrays( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some("sad")], |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_string_arrays( |
| vec![Some("def"), None, None, Some("abc")], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![Some("abc"), Some("def"), None], |
| ); |
| |
| test_sort_string_arrays( |
| vec![Some("def"), None, None, Some("abc")], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some("abc")], |
| ); |
| |
| // more nulls than limit |
| test_sort_string_arrays( |
| vec![Some("def"), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![None, None], |
| ); |
| |
| test_sort_string_arrays( |
| vec![Some("def"), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![Some("def"), None], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_string_dicts() { |
| test_sort_string_dict_arrays::<Int8Type>( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| None, |
| None, |
| vec![ |
| None, |
| None, |
| Some("-ad"), |
| Some("bad"), |
| Some("glad"), |
| Some("sad"), |
| ], |
| ); |
| |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some("sad"), |
| Some("glad"), |
| Some("bad"), |
| Some("-ad"), |
| None, |
| None, |
| ], |
| ); |
| |
| test_sort_string_dict_arrays::<Int32Type>( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some("-ad"), |
| Some("bad"), |
| Some("glad"), |
| Some("sad"), |
| ], |
| ); |
| |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some("sad"), |
| Some("glad"), |
| Some("bad"), |
| Some("-ad"), |
| ], |
| ); |
| |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![ |
| None, |
| Some("bad"), |
| Some("sad"), |
| None, |
| Some("glad"), |
| Some("-ad"), |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some("sad")], |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![Some("def"), None, None, Some("abc")], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![Some("abc"), Some("def"), None], |
| ); |
| |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![Some("def"), None, None, Some("abc")], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some("abc")], |
| ); |
| |
| // more nulls than limit |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![Some("def"), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![None, None], |
| ); |
| |
| test_sort_string_dict_arrays::<Int16Type>( |
| vec![Some("def"), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![Some("def"), None], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_list() { |
| test_sort_list_arrays::<Int8Type>( |
| vec![ |
| Some(vec![Some(1)]), |
| Some(vec![Some(4)]), |
| Some(vec![Some(2)]), |
| Some(vec![Some(3)]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![Some(1)]), |
| Some(vec![Some(2)]), |
| Some(vec![Some(3)]), |
| Some(vec![Some(4)]), |
| ], |
| Some(1), |
| ); |
| |
| test_sort_list_arrays::<Float32Type>( |
| vec![ |
| Some(vec![Some(1.0), Some(0.0)]), |
| Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]), |
| Some(vec![Some(2.0), Some(3.0), Some(4.0)]), |
| Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]), |
| Some(vec![Some(1.0), Some(1.0)]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![Some(1.0), Some(0.0)]), |
| Some(vec![Some(1.0), Some(1.0)]), |
| Some(vec![Some(2.0), Some(3.0), Some(4.0)]), |
| Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]), |
| Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]), |
| ], |
| None, |
| ); |
| |
| test_sort_list_arrays::<Float64Type>( |
| vec![ |
| Some(vec![Some(1.0), Some(0.0)]), |
| Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]), |
| Some(vec![Some(2.0), Some(3.0), Some(4.0)]), |
| Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]), |
| Some(vec![Some(1.0), Some(1.0)]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![Some(1.0), Some(0.0)]), |
| Some(vec![Some(1.0), Some(1.0)]), |
| Some(vec![Some(2.0), Some(3.0), Some(4.0)]), |
| Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]), |
| Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]), |
| ], |
| None, |
| ); |
| |
| test_sort_list_arrays::<Int32Type>( |
| vec![ |
| Some(vec![Some(1), Some(0)]), |
| Some(vec![Some(4), Some(3), Some(2), Some(1)]), |
| Some(vec![Some(2), Some(3), Some(4)]), |
| Some(vec![Some(3), Some(3), Some(3), Some(3)]), |
| Some(vec![Some(1), Some(1)]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![Some(1), Some(0)]), |
| Some(vec![Some(1), Some(1)]), |
| Some(vec![Some(2), Some(3), Some(4)]), |
| Some(vec![Some(3), Some(3), Some(3), Some(3)]), |
| Some(vec![Some(4), Some(3), Some(2), Some(1)]), |
| ], |
| None, |
| ); |
| |
| test_sort_list_arrays::<Int32Type>( |
| vec![ |
| None, |
| Some(vec![Some(4), None, Some(2)]), |
| Some(vec![Some(2), Some(3), Some(4)]), |
| None, |
| Some(vec![Some(3), Some(3), None]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![Some(2), Some(3), Some(4)]), |
| Some(vec![Some(3), Some(3), None]), |
| Some(vec![Some(4), None, Some(2)]), |
| None, |
| None, |
| ], |
| Some(3), |
| ); |
| |
| test_sort_list_arrays::<Int32Type>( |
| vec![ |
| Some(vec![Some(1), Some(0)]), |
| Some(vec![Some(4), Some(3), Some(2), Some(1)]), |
| Some(vec![Some(2), Some(3), Some(4)]), |
| Some(vec![Some(3), Some(3), Some(3), Some(3)]), |
| Some(vec![Some(1), Some(1)]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])], |
| None, |
| ); |
| |
| // valid values less than limit with extra nulls |
| test_sort_list_arrays::<Int32Type>( |
| vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(3), |
| vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None], |
| None, |
| ); |
| |
| test_sort_list_arrays::<Int32Type>( |
| vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(vec![Some(1)])], |
| None, |
| ); |
| |
| // more nulls than limit |
| test_sort_list_arrays::<Int32Type>( |
| vec![Some(vec![Some(1)]), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(2), |
| vec![None, None], |
| None, |
| ); |
| |
| test_sort_list_arrays::<Int32Type>( |
| vec![Some(vec![Some(1)]), None, None, None], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| Some(2), |
| vec![Some(vec![Some(1)]), None], |
| None, |
| ); |
| } |
| |
| #[test] |
| fn test_sort_binary() { |
| test_sort_binary_arrays( |
| vec![ |
| Some(vec![0, 0, 0]), |
| Some(vec![0, 0, 5]), |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| Some(vec![0, 0, 1]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![0, 0, 0]), |
| Some(vec![0, 0, 1]), |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 5]), |
| Some(vec![0, 0, 7]), |
| ], |
| Some(3), |
| ); |
| |
| // with nulls |
| test_sort_binary_arrays( |
| vec![ |
| Some(vec![0, 0, 0]), |
| None, |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| Some(vec![0, 0, 1]), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![0, 0, 0]), |
| Some(vec![0, 0, 1]), |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| None, |
| None, |
| ], |
| Some(3), |
| ); |
| |
| test_sort_binary_arrays( |
| vec![ |
| Some(vec![3, 5, 7]), |
| None, |
| Some(vec![1, 7, 1]), |
| Some(vec![2, 7, 3]), |
| None, |
| Some(vec![1, 4, 3]), |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![1, 4, 3]), |
| Some(vec![1, 7, 1]), |
| Some(vec![2, 7, 3]), |
| Some(vec![3, 5, 7]), |
| None, |
| None, |
| ], |
| Some(3), |
| ); |
| |
| // descending |
| test_sort_binary_arrays( |
| vec![ |
| Some(vec![0, 0, 0]), |
| None, |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| Some(vec![0, 0, 1]), |
| None, |
| ], |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(vec![0, 0, 7]), |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 1]), |
| Some(vec![0, 0, 0]), |
| None, |
| None, |
| ], |
| Some(3), |
| ); |
| |
| // nulls first |
| test_sort_binary_arrays( |
| vec![ |
| Some(vec![0, 0, 0]), |
| None, |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| Some(vec![0, 0, 1]), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| None, |
| vec![ |
| None, |
| None, |
| Some(vec![0, 0, 0]), |
| Some(vec![0, 0, 1]), |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| ], |
| Some(3), |
| ); |
| |
| // limit |
| test_sort_binary_arrays( |
| vec![ |
| Some(vec![0, 0, 0]), |
| None, |
| Some(vec![0, 0, 3]), |
| Some(vec![0, 0, 7]), |
| Some(vec![0, 0, 1]), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(4), |
| vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])], |
| Some(3), |
| ); |
| |
| // var length |
| test_sort_binary_arrays( |
| vec![ |
| Some(b"Hello".to_vec()), |
| None, |
| Some(b"from".to_vec()), |
| Some(b"Apache".to_vec()), |
| Some(b"Arrow-rs".to_vec()), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![ |
| Some(b"Apache".to_vec()), |
| Some(b"Arrow-rs".to_vec()), |
| Some(b"Hello".to_vec()), |
| Some(b"from".to_vec()), |
| None, |
| None, |
| ], |
| None, |
| ); |
| |
| // limit |
| test_sort_binary_arrays( |
| vec![ |
| Some(b"Hello".to_vec()), |
| None, |
| Some(b"from".to_vec()), |
| Some(b"Apache".to_vec()), |
| Some(b"Arrow-rs".to_vec()), |
| None, |
| ], |
| Some(SortOptions { |
| descending: false, |
| nulls_first: true, |
| }), |
| Some(4), |
| vec![ |
| None, |
| None, |
| Some(b"Apache".to_vec()), |
| Some(b"Arrow-rs".to_vec()), |
| ], |
| None, |
| ); |
| } |
| |
| #[test] |
| fn test_lex_sort_single_column() { |
| let input = vec![SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(17), |
| Some(2), |
| Some(-1), |
| Some(0), |
| ])) as ArrayRef, |
| options: None, |
| }]; |
| let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(-1), |
| Some(0), |
| Some(2), |
| Some(17), |
| ])) as ArrayRef]; |
| test_lex_sort_arrays(input.clone(), expected.clone(), None); |
| test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2)); |
| |
| // Explicitly test a limit on the sort as a demonstration |
| let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(-1), |
| Some(0), |
| Some(2), |
| ])) as ArrayRef]; |
| test_lex_sort_arrays(input, expected, Some(3)); |
| } |
| |
| #[test] |
| fn test_lex_sort_unaligned_rows() { |
| let input = vec![ |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)])) |
| as ArrayRef, |
| options: None, |
| }, |
| SortColumn { |
| values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef, |
| options: None, |
| }, |
| ]; |
| assert!( |
| lexsort(&input, None).is_err(), |
| "lexsort should reject columns with different row counts" |
| ); |
| } |
| |
| #[test] |
| fn test_lex_sort_mixed_types() { |
| let input = vec![ |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(0), |
| Some(2), |
| Some(-1), |
| Some(0), |
| ])) as ArrayRef, |
| options: None, |
| }, |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![ |
| Some(101), |
| Some(8), |
| Some(7), |
| Some(102), |
| ])) as ArrayRef, |
| options: None, |
| }, |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(-1), |
| Some(-2), |
| Some(-3), |
| Some(-4), |
| ])) as ArrayRef, |
| options: None, |
| }, |
| ]; |
| let expected = vec![ |
| Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(-1), |
| Some(0), |
| Some(0), |
| Some(2), |
| ])) as ArrayRef, |
| Arc::new(PrimitiveArray::<UInt32Type>::from(vec![ |
| Some(7), |
| Some(101), |
| Some(102), |
| Some(8), |
| ])) as ArrayRef, |
| Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(-3), |
| Some(-1), |
| Some(-4), |
| Some(-2), |
| ])) as ArrayRef, |
| ]; |
| test_lex_sort_arrays(input.clone(), expected.clone(), None); |
| test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); |
| |
| // test mix of string and in64 with option |
| let input = vec![ |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(0), |
| Some(2), |
| Some(-1), |
| Some(0), |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| }, |
| SortColumn { |
| values: Arc::new(StringArray::from(vec![ |
| Some("foo"), |
| Some("9"), |
| Some("7"), |
| Some("bar"), |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| }, |
| ]; |
| let expected = vec![ |
| Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(2), |
| Some(0), |
| Some(0), |
| Some(-1), |
| ])) as ArrayRef, |
| Arc::new(StringArray::from(vec![ |
| Some("9"), |
| Some("foo"), |
| Some("bar"), |
| Some("7"), |
| ])) as ArrayRef, |
| ]; |
| test_lex_sort_arrays(input.clone(), expected.clone(), None); |
| test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3)); |
| |
| // test sort with nulls first |
| let input = vec![ |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| None, |
| Some(-1), |
| Some(2), |
| None, |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| }, |
| SortColumn { |
| values: Arc::new(StringArray::from(vec![ |
| Some("foo"), |
| Some("world"), |
| Some("hello"), |
| None, |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| }, |
| ]; |
| let expected = vec![ |
| Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| None, |
| None, |
| Some(2), |
| Some(-1), |
| ])) as ArrayRef, |
| Arc::new(StringArray::from(vec![ |
| None, |
| Some("foo"), |
| Some("hello"), |
| Some("world"), |
| ])) as ArrayRef, |
| ]; |
| test_lex_sort_arrays(input.clone(), expected.clone(), None); |
| test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1)); |
| |
| // test sort with nulls last |
| let input = vec![ |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| None, |
| Some(-1), |
| Some(2), |
| None, |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| }, |
| SortColumn { |
| values: Arc::new(StringArray::from(vec![ |
| Some("foo"), |
| Some("world"), |
| Some("hello"), |
| None, |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| }, |
| ]; |
| let expected = vec![ |
| Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(2), |
| Some(-1), |
| None, |
| None, |
| ])) as ArrayRef, |
| Arc::new(StringArray::from(vec![ |
| Some("hello"), |
| Some("world"), |
| Some("foo"), |
| None, |
| ])) as ArrayRef, |
| ]; |
| test_lex_sort_arrays(input.clone(), expected.clone(), None); |
| test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); |
| |
| // test sort with opposite options |
| let input = vec![ |
| SortColumn { |
| values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| None, |
| Some(-1), |
| Some(2), |
| Some(-1), |
| None, |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| }, |
| SortColumn { |
| values: Arc::new(StringArray::from(vec![ |
| Some("foo"), |
| Some("bar"), |
| Some("world"), |
| Some("hello"), |
| None, |
| ])) as ArrayRef, |
| options: Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| }, |
| ]; |
| let expected = vec![ |
| Arc::new(PrimitiveArray::<Int64Type>::from(vec![ |
| Some(-1), |
| Some(-1), |
| Some(2), |
| None, |
| None, |
| ])) as ArrayRef, |
| Arc::new(StringArray::from(vec![ |
| Some("hello"), |
| Some("bar"), |
| Some("world"), |
| None, |
| Some("foo"), |
| ])) as ArrayRef, |
| ]; |
| test_lex_sort_arrays(input.clone(), expected.clone(), None); |
| test_lex_sort_arrays( |
| input.clone(), |
| slice_arrays(expected.clone(), 0, 5), |
| Some(5), |
| ); |
| |
| // Limiting by more rows than present is ok |
| test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10)); |
| } |
| |
| #[test] |
| fn test_partial_sort() { |
| let mut before: Vec<&str> = vec![ |
| "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf", |
| ]; |
| let mut d = before.clone(); |
| d.sort_unstable(); |
| |
| for last in 0..before.len() { |
| partial_sort(&mut before, last, |a, b| a.cmp(b)); |
| assert_eq!(&d[0..last], &before.as_slice()[0..last]); |
| } |
| } |
| |
| #[test] |
| fn test_partial_rand_sort() { |
| let size = 1000u32; |
| let mut rng = StdRng::seed_from_u64(42); |
| let mut before: Vec<u32> = (0..size).map(|_| rng.gen::<u32>()).collect(); |
| let mut d = before.clone(); |
| let last = (rng.next_u32() % size) as usize; |
| d.sort_unstable(); |
| |
| partial_sort(&mut before, last, |a, b| a.cmp(b)); |
| assert_eq!(&d[0..last], &before[0..last]); |
| } |
| |
| #[test] |
| fn test_sort_int8_dicts() { |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Int8Array::from(vec![1, 3, 5]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| None, |
| None, |
| vec![None, None, Some(1), Some(3), Some(5), Some(5)], |
| ); |
| |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Int8Array::from(vec![1, 3, 5]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(5), Some(5), Some(3), Some(1), None, None], |
| ); |
| |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Int8Array::from(vec![1, 3, 5]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(1), Some(3), Some(5), Some(5), None, None], |
| ); |
| |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Int8Array::from(vec![1, 3, 5]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(5)], |
| ); |
| |
| // Values have `None`. |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| None, |
| None, |
| vec![None, None, None, Some(1), Some(3), Some(5), Some(5)], |
| ); |
| |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(1), Some(3), Some(5), Some(5), None, None, None], |
| ); |
| |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(5), Some(5), Some(3), Some(1), None, None, None], |
| ); |
| |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Int8Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, None, Some(5), Some(5), Some(3), Some(1)], |
| ); |
| } |
| |
| #[test] |
| fn test_sort_f32_dicts() { |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Float32Array::from(vec![1.2, 3.0, 5.1]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| None, |
| None, |
| vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)], |
| ); |
| |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Float32Array::from(vec![1.2, 3.0, 5.1]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None], |
| ); |
| |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Float32Array::from(vec![1.2, 3.0, 5.1]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None], |
| ); |
| |
| let keys = |
| Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]); |
| let values = Float32Array::from(vec![1.2, 3.0, 5.1]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| Some(3), |
| vec![None, None, Some(5.1)], |
| ); |
| |
| // Values have `None`. |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| None, |
| None, |
| vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)], |
| ); |
| |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: false, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None], |
| ); |
| |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: false, |
| }), |
| None, |
| vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None], |
| ); |
| |
| let keys = Int8Array::from(vec![ |
| Some(1_i8), |
| None, |
| Some(3), |
| None, |
| Some(2), |
| Some(3), |
| Some(0), |
| ]); |
| let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]); |
| test_sort_primitive_dict_arrays::<Int8Type, Float32Type>( |
| keys, |
| values, |
| Some(SortOptions { |
| descending: true, |
| nulls_first: true, |
| }), |
| None, |
| vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)], |
| ); |
| } |
| } |