cpp/src/arrow/compute/kernels/aggregate_test.cc - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <algorithm>
 #include <limits>
 #include <memory>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>

 #include <gtest/gtest.h>

 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/registry.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bitmap_reader.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/int_util_internal.h"

 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/logging.h"

 namespace arrow {

 using internal::BitmapReader;
 using internal::checked_cast;
 using internal::checked_pointer_cast;

 namespace compute {

 //
 // Sum
 //

 template <typename ArrowType>
 using SumResult =
     std::pair<typename FindAccumulatorType<ArrowType>::Type::c_type, size_t>;

 template <typename ArrowType>
 static SumResult<ArrowType> NaiveSumPartial(const Array& array) {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using ResultType = SumResult<ArrowType>;

   ResultType result;

   auto data = array.data();
   const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
   const auto values = array_numeric.raw_values();

   if (array.null_count() != 0) {
     BitmapReader reader(array.null_bitmap_data(), array.offset(), array.length());
     for (int64_t i = 0; i < array.length(); i++) {
       if (reader.IsSet()) {
         result.first += values[i];
         result.second++;
       }

       reader.Next();
     }
   } else {
     for (int64_t i = 0; i < array.length(); i++) {
       result.first += values[i];
       result.second++;
     }
   }

   return result;
 }

 template <typename ArrowType>
 static Datum NaiveSum(const Array& array) {
   using SumType = typename FindAccumulatorType<ArrowType>::Type;
   using SumScalarType = typename TypeTraits<SumType>::ScalarType;

   auto result = NaiveSumPartial<ArrowType>(array);
   bool is_valid = result.second > 0;

   if (!is_valid) return Datum(std::make_shared<SumScalarType>());
   return Datum(std::make_shared<SumScalarType>(result.first));
 }

 template <typename ArrowType>
 void ValidateSum(const Array& input, Datum expected) {
   using OutputType = typename FindAccumulatorType<ArrowType>::Type;

   ASSERT_OK_AND_ASSIGN(Datum result, Sum(input));
   DatumEqual<OutputType>::EnsureEqual(result, expected);
 }

 template <typename ArrowType>
 void ValidateSum(const std::shared_ptr<ChunkedArray>& input, Datum expected) {
   using OutputType = typename FindAccumulatorType<ArrowType>::Type;

   ASSERT_OK_AND_ASSIGN(Datum result, Sum(input));
   DatumEqual<OutputType>::EnsureEqual(result, expected);
 }

 template <typename ArrowType>
 void ValidateSum(const char* json, Datum expected) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
   ValidateSum<ArrowType>(*array, expected);
 }

 template <typename ArrowType>
 void ValidateSum(const std::vector<std::string>& json, Datum expected) {
   auto array = ChunkedArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
   ValidateSum<ArrowType>(array, expected);
 }

 template <typename ArrowType>
 void ValidateSum(const Array& array) {
   ValidateSum<ArrowType>(array, NaiveSum<ArrowType>(array));
 }

 using UnaryOp = Result<Datum>(const Datum&, ExecContext*);

 template <UnaryOp& Op, typename ScalarType>
 void ValidateBooleanAgg(const std::string& json,
                         const std::shared_ptr<ScalarType>& expected) {
   auto array = ArrayFromJSON(boolean(), json);
   auto exp = Datum(expected);
   ASSERT_OK_AND_ASSIGN(Datum result, Op(array, nullptr));
   ASSERT_TRUE(result.Equals(exp));
 }

 TEST(TestBooleanAggregation, Sum) {
   ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>());
   ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>());
   ValidateBooleanAgg<Sum>("[null, false]", std::make_shared<UInt64Scalar>(0));
   ValidateBooleanAgg<Sum>("[true]", std::make_shared<UInt64Scalar>(1));
   ValidateBooleanAgg<Sum>("[true, false, true]", std::make_shared<UInt64Scalar>(2));
   ValidateBooleanAgg<Sum>("[true, false, true, true, null]",
                           std::make_shared<UInt64Scalar>(3));
 }

 TEST(TestBooleanAggregation, Mean) {
   ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>());
   ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>());
   ValidateBooleanAgg<Mean>("[null, false]", std::make_shared<DoubleScalar>(0));
   ValidateBooleanAgg<Mean>("[true]", std::make_shared<DoubleScalar>(1));
   ValidateBooleanAgg<Mean>("[true, false, true, false]",
                            std::make_shared<DoubleScalar>(0.5));
   ValidateBooleanAgg<Mean>("[true, null]", std::make_shared<DoubleScalar>(1));
   ValidateBooleanAgg<Mean>("[true, null, false, true, true]",
                            std::make_shared<DoubleScalar>(0.75));
   ValidateBooleanAgg<Mean>("[true, null, false, false, false]",
                            std::make_shared<DoubleScalar>(0.25));
 }

 template <typename ArrowType>
 class TestNumericSumKernel : public ::testing::Test {};

 TYPED_TEST_SUITE(TestNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestNumericSumKernel, SimpleSum) {
   using SumType = typename FindAccumulatorType<TypeParam>::Type;
   using ScalarType = typename TypeTraits<SumType>::ScalarType;
   using T = typename TypeParam::c_type;

   ValidateSum<TypeParam>("[]", Datum(std::make_shared<ScalarType>()));

   ValidateSum<TypeParam>("[null]", Datum(std::make_shared<ScalarType>()));

   ValidateSum<TypeParam>("[0, 1, 2, 3, 4, 5]",
                          Datum(std::make_shared<ScalarType>(static_cast<T>(5 * 6 / 2))));

   std::vector<std::string> chunks = {"[0, 1, 2, 3, 4, 5]"};
   ValidateSum<TypeParam>(chunks,
                          Datum(std::make_shared<ScalarType>(static_cast<T>(5 * 6 / 2))));

   chunks = {"[0, 1, 2]", "[3, 4, 5]"};
   ValidateSum<TypeParam>(chunks,
                          Datum(std::make_shared<ScalarType>(static_cast<T>(5 * 6 / 2))));

   chunks = {"[0, 1, 2]", "[]", "[3, 4, 5]"};
   ValidateSum<TypeParam>(chunks,
                          Datum(std::make_shared<ScalarType>(static_cast<T>(5 * 6 / 2))));

   chunks = {};
   ValidateSum<TypeParam>(chunks,
                          Datum(std::make_shared<ScalarType>()));  // null

   const T expected_result = static_cast<T>(14);
   ValidateSum<TypeParam>("[1, null, 3, null, 3, null, 7]",
                          Datum(std::make_shared<ScalarType>(expected_result)));
 }

 template <typename ArrowType>
 class TestRandomNumericSumKernel : public ::testing::Test {};

 TYPED_TEST_SUITE(TestRandomNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericSumKernel, RandomArraySum) {
   auto rand = random::RandomArrayGenerator(0x5487655);
   // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
     for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
         ValidateSum<TypeParam>(*array);
       }
     }
   }
 }

 TYPED_TEST_SUITE(TestRandomNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericSumKernel, RandomArraySumOverflow) {
   using CType = typename TypeParam::c_type;
   using SumCType = typename FindAccumulatorType<TypeParam>::Type::c_type;
   if (sizeof(CType) == sizeof(SumCType)) {
     // Skip if accumulator type is same to original type
     return;
   }

   CType max = std::numeric_limits<CType>::max();
   CType min = std::numeric_limits<CType>::min();
   int64_t length = 1024;

   auto rand = random::RandomArrayGenerator(0x5487655);
   for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
     // Test overflow on the original type
     auto array = rand.Numeric<TypeParam>(length, max - 200, max - 100, null_probability);
     ValidateSum<TypeParam>(*array);
     array = rand.Numeric<TypeParam>(length, min + 100, min + 200, null_probability);
     ValidateSum<TypeParam>(*array);
   }
 }

 TYPED_TEST(TestRandomNumericSumKernel, RandomSliceArraySum) {
   auto arithmetic = ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(),
                                   "[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]");
   ValidateSum<TypeParam>(*arithmetic);
   for (size_t i = 1; i < 15; i++) {
     auto slice = arithmetic->Slice(i, 16);
     ValidateSum<TypeParam>(*slice);
   }

   // Trigger ConsumeSparse with different slice offsets.
   auto rand = random::RandomArrayGenerator(0xfa432643);
   const int64_t length = 1U << 5;
   auto array = rand.Numeric<TypeParam>(length, 0, 10, 0.5);
   for (size_t i = 1; i < 16; i++) {
     for (size_t j = 1; j < 16; j++) {
       auto slice = array->Slice(i, length - j);
       ValidateSum<TypeParam>(*slice);
     }
   }
 }

 // Test round-off error
 class TestSumKernelRoundOff : public ::testing::Test {};

 TEST_F(TestSumKernelRoundOff, Basics) {
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;

   // array = np.arange(321000, dtype='float64')
   // array -= np.mean(array)
   // array *= arrray
   double index = 0;
   ASSERT_OK_AND_ASSIGN(
       auto array, ArrayFromBuilderVisitor(
                       float64(), 321000, [&](NumericBuilder<DoubleType>* builder) {
                         builder->UnsafeAppend((index - 160499.5) * (index - 160499.5));
                         ++index;
                       }));

   // reference value from numpy.sum()
   ASSERT_OK_AND_ASSIGN(Datum result, Sum(array));
   auto sum = checked_cast<const ScalarType*>(result.scalar().get());
   ASSERT_EQ(sum->value, 2756346749973250.0);
 }

 //
 // Count
 //

 using CountPair = std::pair<int64_t, int64_t>;

 static CountPair NaiveCount(const Array& array) {
   CountPair count;

   count.first = array.length() - array.null_count();
   count.second = array.null_count();

   return count;
 }

 void ValidateCount(const Array& input, CountPair expected) {
   CountOptions all = CountOptions(CountOptions::COUNT_NON_NULL);
   CountOptions nulls = CountOptions(CountOptions::COUNT_NULL);

   ASSERT_OK_AND_ASSIGN(Datum result, Count(input, all));
   AssertDatumsEqual(result, Datum(expected.first));

   ASSERT_OK_AND_ASSIGN(result, Count(input, nulls));
   AssertDatumsEqual(result, Datum(expected.second));
 }

 template <typename ArrowType>
 void ValidateCount(const char* json, CountPair expected) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
   ValidateCount(*array, expected);
 }

 void ValidateCount(const Array& input) { ValidateCount(input, NaiveCount(input)); }

 template <typename ArrowType>
 class TestCountKernel : public ::testing::Test {};

 TYPED_TEST_SUITE(TestCountKernel, NumericArrowTypes);
 TYPED_TEST(TestCountKernel, SimpleCount) {
   ValidateCount<TypeParam>("[]", {0, 0});
   ValidateCount<TypeParam>("[null]", {0, 1});
   ValidateCount<TypeParam>("[1, null, 2]", {2, 1});
   ValidateCount<TypeParam>("[null, null, null]", {0, 3});
   ValidateCount<TypeParam>("[1, 2, 3, 4, 5, 6, 7, 8, 9]", {9, 0});
 }

 template <typename ArrowType>
 class TestRandomNumericCountKernel : public ::testing::Test {};

 TYPED_TEST_SUITE(TestRandomNumericCountKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericCountKernel, RandomArrayCount) {
   auto rand = random::RandomArrayGenerator(0x1205643);
   for (size_t i = 3; i < 10; i++) {
     for (auto null_probability : {0.0, 0.01, 0.1, 0.25, 0.5, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
         ValidateCount(*array);
       }
     }
   }
 }

 //
 // Mean
 //

 template <typename ArrowType>
 static Datum NaiveMean(const Array& array) {
   using MeanScalarType = typename TypeTraits<DoubleType>::ScalarType;

   const auto result = NaiveSumPartial<ArrowType>(array);
   const double mean = static_cast<double>(result.first) /
                       static_cast<double>(result.second ? result.second : 1UL);
   const bool is_valid = result.second > 0;

   if (!is_valid) return Datum(std::make_shared<MeanScalarType>());
   return Datum(std::make_shared<MeanScalarType>(mean));
 }

 template <typename ArrowType>
 void ValidateMean(const Array& input, Datum expected) {
   using OutputType = typename FindAccumulatorType<DoubleType>::Type;

   ASSERT_OK_AND_ASSIGN(Datum result, Mean(input));
   DatumEqual<OutputType>::EnsureEqual(result, expected);
 }

 template <typename ArrowType>
 void ValidateMean(const char* json, Datum expected) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
   ValidateMean<ArrowType>(*array, expected);
 }

 template <typename ArrowType>
 void ValidateMean(const Array& array) {
   ValidateMean<ArrowType>(array, NaiveMean<ArrowType>(array));
 }

 template <typename ArrowType>
 class TestMeanKernelNumeric : public ::testing::Test {};

 TYPED_TEST_SUITE(TestMeanKernelNumeric, NumericArrowTypes);
 TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;

   ValidateMean<TypeParam>("[]", Datum(std::make_shared<ScalarType>()));

   ValidateMean<TypeParam>("[null]", Datum(std::make_shared<ScalarType>()));

   ValidateMean<TypeParam>("[1, null, 1]", Datum(std::make_shared<ScalarType>(1.0)));

   ValidateMean<TypeParam>("[1, 2, 3, 4, 5, 6, 7, 8]",
                           Datum(std::make_shared<ScalarType>(4.5)));

   ValidateMean<TypeParam>("[0, 0, 0, 0, 0, 0, 0, 0]",
                           Datum(std::make_shared<ScalarType>(0.0)));

   ValidateMean<TypeParam>("[1, 1, 1, 1, 1, 1, 1, 1]",
                           Datum(std::make_shared<ScalarType>(1.0)));
 }

 template <typename ArrowType>
 class TestRandomNumericMeanKernel : public ::testing::Test {};

 TYPED_TEST_SUITE(TestRandomNumericMeanKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMean) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
   // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
     for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
         ValidateMean<TypeParam>(*array);
       }
     }
   }
 }

 TYPED_TEST_SUITE(TestRandomNumericMeanKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMeanOverflow) {
   using CType = typename TypeParam::c_type;
   using SumCType = typename FindAccumulatorType<TypeParam>::Type::c_type;
   if (sizeof(CType) == sizeof(SumCType)) {
     // Skip if accumulator type is same to original type
     return;
   }

   CType max = std::numeric_limits<CType>::max();
   CType min = std::numeric_limits<CType>::min();
   int64_t length = 1024;

   auto rand = random::RandomArrayGenerator(0x8afc055);
   for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
     // Test overflow on the original type
     auto array = rand.Numeric<TypeParam>(length, max - 200, max - 100, null_probability);
     ValidateMean<TypeParam>(*array);
     array = rand.Numeric<TypeParam>(length, min + 100, min + 200, null_probability);
     ValidateMean<TypeParam>(*array);
   }
 }

 //
 // Min / Max
 //

 template <typename ArrowType>
 class TestPrimitiveMinMaxKernel : public ::testing::Test {
   using Traits = TypeTraits<ArrowType>;
   using ArrayType = typename Traits::ArrayType;
   using c_type = typename ArrowType::c_type;
   using ScalarType = typename Traits::ScalarType;

  public:
   void AssertMinMaxIs(const Datum& array, c_type expected_min, c_type expected_max,
                       const MinMaxOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
     const StructScalar& value = out.scalar_as<StructScalar>();

     const auto& out_min = checked_cast<const ScalarType&>(*value.value[0]);
     ASSERT_EQ(expected_min, out_min.value);

     const auto& out_max = checked_cast<const ScalarType&>(*value.value[1]);
     ASSERT_EQ(expected_max, out_max.value);
   }

   void AssertMinMaxIs(const std::string& json, c_type expected_min, c_type expected_max,
                       const MinMaxOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertMinMaxIs(array, expected_min, expected_max, options);
   }

   void AssertMinMaxIs(const std::vector<std::string>& json, c_type expected_min,
                       c_type expected_max, const MinMaxOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertMinMaxIs(array, expected_min, expected_max, options);
   }

   void AssertMinMaxIsNull(const Datum& array, const MinMaxOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
     for (const auto& val : out.scalar_as<StructScalar>().value) {
       ASSERT_FALSE(val->is_valid);
     }
   }

   void AssertMinMaxIsNull(const std::string& json, const MinMaxOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertMinMaxIsNull(array, options);
   }

   void AssertMinMaxIsNull(const std::vector<std::string>& json,
                           const MinMaxOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertMinMaxIsNull(array, options);
   }

   std::shared_ptr<DataType> type_singleton() { return Traits::type_singleton(); }
 };

 template <typename ArrowType>
 class TestIntegerMinMaxKernel : public TestPrimitiveMinMaxKernel<ArrowType> {};

 template <typename ArrowType>
 class TestFloatingMinMaxKernel : public TestPrimitiveMinMaxKernel<ArrowType> {};

 class TestBooleanMinMaxKernel : public TestPrimitiveMinMaxKernel<BooleanType> {};

 TEST_F(TestBooleanMinMaxKernel, Basics) {
   MinMaxOptions options;
   std::vector<std::string> chunked_input0 = {"[]", "[]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[true, null]", "[null, false]"};

   // SKIP nulls by default
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIs("[false, false, false]", false, false, options);
   this->AssertMinMaxIs("[false, false, false, null]", false, false, options);
   this->AssertMinMaxIs("[true, null, true, true]", true, true, options);
   this->AssertMinMaxIs("[true, null, true, true]", true, true, options);
   this->AssertMinMaxIs("[true, null, false, true]", false, true, options);
   this->AssertMinMaxIsNull(chunked_input0, options);
   this->AssertMinMaxIs(chunked_input1, true, true, options);
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIs(chunked_input3, false, true, options);

   options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIsNull("[false, null, false]", options);
   this->AssertMinMaxIsNull("[true, null]", options);
   this->AssertMinMaxIs("[true, true, true]", true, true, options);
   this->AssertMinMaxIs("[false, false]", false, false, options);
   this->AssertMinMaxIs("[false, true]", false, true, options);
   this->AssertMinMaxIsNull(chunked_input0, options);
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
 }

 TYPED_TEST_SUITE(TestIntegerMinMaxKernel, IntegralArrowTypes);
 TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
   MinMaxOptions options;
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};

   // SKIP nulls by default
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, null, 2, 3, 4]", 2, 5, options);
   this->AssertMinMaxIs(chunked_input1, 1, 9, options);
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);

   options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   // output null
   this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
   // output null
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIsNull(chunked_input2, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
 }

 TYPED_TEST_SUITE(TestFloatingMinMaxKernel, RealArrowTypes);
 TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   MinMaxOptions options;
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};

   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, null, 2, 3, 4]", 2, 5, options);
   this->AssertMinMaxIs("[5, Inf, 2, 3, 4]", 2.0, INFINITY, options);
   this->AssertMinMaxIs("[5, NaN, 2, 3, 4]", 2, 5, options);
   this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
   this->AssertMinMaxIs(chunked_input1, 1, 9, options);
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);

   options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
   // output null
   this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
   // output null
   this->AssertMinMaxIsNull("[5, -Inf, null, 3, 4]", options);
   // output null
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIsNull(chunked_input2, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
 }

 TYPED_TEST(TestFloatingMinMaxKernel, DefaultOptions) {
   auto values = ArrayFromJSON(this->type_singleton(), "[0, 1, 2, 3, 4]");

   ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("min_max", {values}));

   auto default_options = MinMaxOptions::Defaults();
   ASSERT_OK_AND_ASSIGN(auto explicit_defaults,
                        CallFunction("min_max", {values}, &default_options));

   AssertDatumsEqual(explicit_defaults, no_options_provided);
 }

 template <typename ArrowType>
 struct MinMaxResult {
   using T = typename ArrowType::c_type;

   T min = 0;
   T max = 0;
   bool is_valid = false;
 };

 template <typename ArrowType>
 static enable_if_integer<ArrowType, MinMaxResult<ArrowType>> NaiveMinMax(
     const Array& array) {
   using T = typename ArrowType::c_type;
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;

   MinMaxResult<ArrowType> result;

   const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
   const auto values = array_numeric.raw_values();

   if (array.length() <= array.null_count()) {  // All null values
     return result;
   }

   T min = std::numeric_limits<T>::max();
   T max = std::numeric_limits<T>::min();
   if (array.null_count() != 0) {  // Some values are null
     BitmapReader reader(array.null_bitmap_data(), array.offset(), array.length());
     for (int64_t i = 0; i < array.length(); i++) {
       if (reader.IsSet()) {
         min = std::min(min, values[i]);
         max = std::max(max, values[i]);
       }
       reader.Next();
     }
   } else {  // All true values
     for (int64_t i = 0; i < array.length(); i++) {
       min = std::min(min, values[i]);
       max = std::max(max, values[i]);
     }
   }

   result.min = min;
   result.max = max;
   result.is_valid = true;
   return result;
 }

 template <typename ArrowType>
 static enable_if_floating_point<ArrowType, MinMaxResult<ArrowType>> NaiveMinMax(
     const Array& array) {
   using T = typename ArrowType::c_type;
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;

   MinMaxResult<ArrowType> result;

   const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
   const auto values = array_numeric.raw_values();

   if (array.length() <= array.null_count()) {  // All null values
     return result;
   }

   T min = std::numeric_limits<T>::infinity();
   T max = -std::numeric_limits<T>::infinity();
   if (array.null_count() != 0) {  // Some values are null
     BitmapReader reader(array.null_bitmap_data(), array.offset(), array.length());
     for (int64_t i = 0; i < array.length(); i++) {
       if (reader.IsSet()) {
         min = std::fmin(min, values[i]);
         max = std::fmax(max, values[i]);
       }
       reader.Next();
     }
   } else {  // All true values
     for (int64_t i = 0; i < array.length(); i++) {
       min = std::fmin(min, values[i]);
       max = std::fmax(max, values[i]);
     }
   }

   result.min = min;
   result.max = max;
   result.is_valid = true;
   return result;
 }

 template <typename ArrowType>
 void ValidateMinMax(const Array& array) {
   using Traits = TypeTraits<ArrowType>;
   using ScalarType = typename Traits::ScalarType;

   ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array));
   const StructScalar& value = out.scalar_as<StructScalar>();

   auto expected = NaiveMinMax<ArrowType>(array);
   const auto& out_min = checked_cast<const ScalarType&>(*value.value[0]);
   const auto& out_max = checked_cast<const ScalarType&>(*value.value[1]);

   if (expected.is_valid) {
     ASSERT_TRUE(out_min.is_valid);
     ASSERT_TRUE(out_max.is_valid);
     ASSERT_EQ(expected.min, out_min.value);
     ASSERT_EQ(expected.max, out_max.value);
   } else {  // All null values
     ASSERT_FALSE(out_min.is_valid);
     ASSERT_FALSE(out_max.is_valid);
   }
 }

 template <typename ArrowType>
 class TestRandomNumericMinMaxKernel : public ::testing::Test {};

 TYPED_TEST_SUITE(TestRandomNumericMinMaxKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
   // Test size up to 1<<11 (2048).
   for (size_t i = 3; i < 12; i += 2) {
     for (auto null_probability : {0.0, 0.01, 0.1, 0.5, 0.99, 1.0}) {
       int64_t base_length = (1UL << i) + 2;
       auto array = rand.Numeric<TypeParam>(base_length, 0, 100, null_probability);
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         ValidateMinMax<TypeParam>(*array->Slice(0, length));
       }
     }
   }
 }

 //
 // Any
 //

 class TestPrimitiveAnyKernel : public ::testing::Test {
  public:
   void AssertAnyIs(const Datum& array, bool expected) {
     ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
     const BooleanScalar& out_any = out.scalar_as<BooleanScalar>();
     const auto expected_any = static_cast<const BooleanScalar>(expected);
     ASSERT_EQ(out_any, expected_any);
   }

   void AssertAnyIs(const std::string& json, bool expected) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertAnyIs(array, expected);
   }

   void AssertAnyIs(const std::vector<std::string>& json, bool expected) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertAnyIs(array, expected);
   }

   std::shared_ptr<DataType> type_singleton() {
     return TypeTraits<BooleanType>::type_singleton();
   }
 };

 class TestAnyKernel : public TestPrimitiveAnyKernel {};

 TEST_F(TestAnyKernel, Basics) {
   std::vector<std::string> chunked_input0 = {"[]", "[true]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
   std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};

   this->AssertAnyIs("[]", false);
   this->AssertAnyIs("[false]", false);
   this->AssertAnyIs("[true, false]", true);
   this->AssertAnyIs("[null, null, null]", false);
   this->AssertAnyIs("[false, false, false]", false);
   this->AssertAnyIs("[false, false, false, null]", false);
   this->AssertAnyIs("[true, null, true, true]", true);
   this->AssertAnyIs("[false, null, false, true]", true);
   this->AssertAnyIs("[true, null, false, true]", true);
   this->AssertAnyIs(chunked_input0, true);
   this->AssertAnyIs(chunked_input1, true);
   this->AssertAnyIs(chunked_input2, false);
   this->AssertAnyIs(chunked_input3, false);
   this->AssertAnyIs(chunked_input4, true);
 }

 //
 // All
 //

 class TestPrimitiveAllKernel : public ::testing::Test {
  public:
   void AssertAllIs(const Datum& array, bool expected) {
     ASSERT_OK_AND_ASSIGN(Datum out, All(array));
     const BooleanScalar& out_all = out.scalar_as<BooleanScalar>();
     const auto expected_all = static_cast<const BooleanScalar>(expected);
     ASSERT_EQ(out_all, expected_all);
   }

   void AssertAllIs(const std::string& json, bool expected) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertAllIs(array, expected);
   }

   void AssertAllIs(const std::vector<std::string>& json, bool expected) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertAllIs(array, expected);
   }

   std::shared_ptr<DataType> type_singleton() {
     return TypeTraits<BooleanType>::type_singleton();
   }
 };

 class TestAllKernel : public TestPrimitiveAllKernel {};

 TEST_F(TestAllKernel, Basics) {
   std::vector<std::string> chunked_input0 = {"[]", "[true]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
   std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};
   std::vector<std::string> chunked_input5 = {"[false, null]", "[null, true]"};

   this->AssertAllIs("[]", true);
   this->AssertAllIs("[false]", false);
   this->AssertAllIs("[true, false]", false);
   this->AssertAllIs("[null, null, null]", true);
   this->AssertAllIs("[false, false, false]", false);
   this->AssertAllIs("[false, false, false, null]", false);
   this->AssertAllIs("[true, null, true, true]", true);
   this->AssertAllIs("[false, null, false, true]", false);
   this->AssertAllIs("[true, null, false, true]", false);
   this->AssertAllIs(chunked_input0, true);
   this->AssertAllIs(chunked_input1, true);
   this->AssertAllIs(chunked_input2, false);
   this->AssertAllIs(chunked_input3, false);
   this->AssertAllIs(chunked_input4, false);
   this->AssertAllIs(chunked_input5, false);
 }

 //
 // Mode
 //

 template <typename T>
 class TestPrimitiveModeKernel : public ::testing::Test {
  public:
   using ArrowType = T;
   using Traits = TypeTraits<ArrowType>;
   using CType = typename ArrowType::c_type;

   void AssertModesAre(const Datum& array, const int n,
                       const std::vector<CType>& expected_modes,
                       const std::vector<int64_t>& expected_counts) {
     ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
     ASSERT_OK(out.make_array()->ValidateFull());
     const StructArray out_array(out.array());
     ASSERT_EQ(out_array.length(), expected_modes.size());
     ASSERT_EQ(out_array.num_fields(), 2);

     const CType* out_modes = out_array.field(0)->data()->GetValues<CType>(1);
     const int64_t* out_counts = out_array.field(1)->data()->GetValues<int64_t>(1);
     for (int i = 0; i < out_array.length(); ++i) {
       // equal or nan equal
       ASSERT_TRUE(
           (expected_modes[i] == out_modes[i]) ||
           (expected_modes[i] != expected_modes[i] && out_modes[i] != out_modes[i]));
       ASSERT_EQ(expected_counts[i], out_counts[i]);
     }
   }

   void AssertModesAre(const std::string& json, const int n,
                       const std::vector<CType>& expected_modes,
                       const std::vector<int64_t>& expected_counts) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertModesAre(array, n, expected_modes, expected_counts);
   }

   void AssertModeIs(const Datum& array, CType expected_mode, int64_t expected_count) {
     AssertModesAre(array, 1, {expected_mode}, {expected_count});
   }

   void AssertModeIs(const std::string& json, CType expected_mode,
                     int64_t expected_count) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertModeIs(array, expected_mode, expected_count);
   }

   void AssertModeIs(const std::vector<std::string>& json, CType expected_mode,
                     int64_t expected_count) {
     auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
     AssertModeIs(chunked, expected_mode, expected_count);
   }

   void AssertModesEmpty(const Datum& array, int n) {
     ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
     ASSERT_OK(out.make_array()->ValidateFull());
     ASSERT_EQ(out.array()->length, 0);
   }

   void AssertModesEmpty(const std::string& json, int n = 1) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertModesEmpty(array, n);
   }

   void AssertModesEmpty(const std::vector<std::string>& json, int n = 1) {
     auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
     AssertModesEmpty(chunked, n);
   }

   std::shared_ptr<DataType> type_singleton() { return Traits::type_singleton(); }
 };

 template <typename ArrowType>
 class TestIntegerModeKernel : public TestPrimitiveModeKernel<ArrowType> {};

 template <typename ArrowType>
 class TestFloatingModeKernel : public TestPrimitiveModeKernel<ArrowType> {};

 class TestBooleanModeKernel : public TestPrimitiveModeKernel<BooleanType> {};

 class TestInt8ModeKernelValueRange : public TestPrimitiveModeKernel<Int8Type> {};

 class TestInt32ModeKernel : public TestPrimitiveModeKernel<Int32Type> {};

 TEST_F(TestBooleanModeKernel, Basics) {
   this->AssertModeIs("[false, false]", false, 2);
   this->AssertModeIs("[false, false, true, true, true]", true, 3);
   this->AssertModeIs("[true, false, false, true, true]", true, 3);
   this->AssertModeIs("[false, false, true, true, true, false]", false, 3);

   this->AssertModeIs("[true, null, false, false, null, true, null, null, true]", true, 3);
   this->AssertModesEmpty("[null, null, null]");
   this->AssertModesEmpty("[]");

   this->AssertModeIs({"[true, false]", "[true, true]", "[false, false]"}, false, 3);
   this->AssertModeIs({"[true, null]", "[]", "[null, false]"}, false, 1);
   this->AssertModesEmpty({"[null, null]", "[]", "[null]"});

   this->AssertModesAre("[false, false, true, true, true, false]", 2, {false, true},
                        {3, 3});
   this->AssertModesAre("[true, null, false, false, null, true, null, null, true]", 100,
                        {true, false}, {3, 2});
   this->AssertModesEmpty({"[null, null]", "[]", "[null]"}, 4);
 }

 TYPED_TEST_SUITE(TestIntegerModeKernel, IntegralArrowTypes);
 TYPED_TEST(TestIntegerModeKernel, Basics) {
   this->AssertModeIs("[5, 1, 1, 5, 5]", 5, 3);
   this->AssertModeIs("[5, 1, 1, 5, 5, 1]", 1, 3);
   this->AssertModeIs("[127, 0, 127, 127, 0, 1, 0, 127]", 127, 4);

   this->AssertModeIs("[null, null, 2, null, 1]", 1, 1);
   this->AssertModesEmpty("[null, null, null]");
   this->AssertModesEmpty("[]");

   this->AssertModeIs({"[5]", "[1, 1, 5]", "[5]"}, 5, 3);
   this->AssertModeIs({"[5]", "[1, 1, 5]", "[5, 1]"}, 1, 3);
   this->AssertModesEmpty({"[null, null]", "[]", "[null]"});

   this->AssertModesAre("[127, 0, 127, 127, 0, 1, 0, 127]", 2, {127, 0}, {4, 3});
   this->AssertModesAre("[null, null, 2, null, 1]", 3, {1, 2}, {1, 1});
   this->AssertModesEmpty("[null, null, null]", 10);
 }

 TYPED_TEST_SUITE(TestFloatingModeKernel, RealArrowTypes);
 TYPED_TEST(TestFloatingModeKernel, Floats) {
   this->AssertModeIs("[5, 1, 1, 5, 5]", 5, 3);
   this->AssertModeIs("[5, 1, 1, 5, 5, 1]", 1, 3);
   this->AssertModeIs("[Inf, 100, Inf, 100, Inf]", INFINITY, 3);
   this->AssertModeIs("[Inf, -Inf, Inf, -Inf]", -INFINITY, 2);

   this->AssertModeIs("[null, null, 2, null, 1]", 1, 1);
   this->AssertModeIs("[NaN, NaN, 1, null, 1]", 1, 2);

   this->AssertModesEmpty("[null, null, null]");
   this->AssertModesEmpty("[]");

   this->AssertModeIs("[NaN, NaN, 1]", NAN, 2);
   this->AssertModeIs("[NaN, NaN, null]", NAN, 2);
   this->AssertModeIs("[NaN, NaN, NaN]", NAN, 3);

   this->AssertModeIs({"[Inf, 100]", "[Inf, 100]", "[Inf]"}, INFINITY, 3);
   this->AssertModeIs({"[NaN, 1]", "[NaN, 1]", "[NaN]"}, NAN, 3);
   this->AssertModesEmpty({"[null, null]", "[]", "[null]"});

   this->AssertModesAre("[Inf, 100, Inf, 100, Inf]", 2, {INFINITY, 100}, {3, 2});
   this->AssertModesAre("[NaN, NaN, 1, null, 1, 2, 2]", 3, {1, 2, NAN}, {2, 2, 2});
 }

 TEST_F(TestInt8ModeKernelValueRange, Basics) {
   this->AssertModeIs("[0, 127, -128, -128]", -128, 2);
   this->AssertModeIs("[127, 127, 127]", 127, 3);
 }

 template <typename ArrowType>
 struct ModeResult {
   using T = typename ArrowType::c_type;

   T mode = std::numeric_limits<T>::min();
   int64_t count = 0;
 };

 template <typename ArrowType>
 ModeResult<ArrowType> NaiveMode(const Array& array) {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using CTYPE = typename ArrowType::c_type;

   std::unordered_map<CTYPE, int64_t> value_counts;

   const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
   const auto values = array_numeric.raw_values();
   BitmapReader reader(array.null_bitmap_data(), array.offset(), array.length());
   for (int64_t i = 0; i < array.length(); ++i) {
     if (reader.IsSet()) {
       ++value_counts[values[i]];
     }
     reader.Next();
   }

   ModeResult<ArrowType> result;
   for (const auto& value_count : value_counts) {
     auto value = value_count.first;
     auto count = value_count.second;
     if (count > result.count || (count == result.count && value < result.mode)) {
       result.count = count;
       result.mode = value;
     }
   }

   return result;
 }

 template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
 void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
   auto rand = random::RandomArrayGenerator(0x5487655);
   // 32K items (>= counting mode cutoff) within range, 10% null
   auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);

   auto expected = NaiveMode<ArrowType>(*array);
   ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
   ASSERT_OK(out.make_array()->ValidateFull());
   const StructArray out_array(out.array());
   ASSERT_EQ(out_array.length(), 1);
   ASSERT_EQ(out_array.num_fields(), 2);

   const CTYPE* out_modes = out_array.field(0)->data()->GetValues<CTYPE>(1);
   const int64_t* out_counts = out_array.field(1)->data()->GetValues<int64_t>(1);
   ASSERT_EQ(out_modes[0], expected.mode);
   ASSERT_EQ(out_counts[0], expected.count);
 }

 TEST_F(TestInt32ModeKernel, SmallValueRange) {
   // Small value range => should exercise counter-based Mode implementation
   CheckModeWithRange<ArrowType>(-100, 100);
 }

 TEST_F(TestInt32ModeKernel, LargeValueRange) {
   // Large value range => should exercise sorter-based Mode implementation
   CheckModeWithRange<ArrowType>(-10000000, 10000000);
 }

 //
 // Variance/Stddev
 //

 template <typename ArrowType>
 class TestPrimitiveVarStdKernel : public ::testing::Test {
  public:
   using Traits = TypeTraits<ArrowType>;
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;

   void AssertVarStdIs(const Array& array, const VarianceOptions& options,
                       double expected_var) {
     AssertVarStdIsInternal(array, options, expected_var);
   }

   void AssertVarStdIs(const std::shared_ptr<ChunkedArray>& array,
                       const VarianceOptions& options, double expected_var) {
     AssertVarStdIsInternal(array, options, expected_var);
   }

   void AssertVarStdIs(const std::string& json, const VarianceOptions& options,
                       double expected_var) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertVarStdIs(*array, options, expected_var);
   }

   void AssertVarStdIs(const std::vector<std::string>& json,
                       const VarianceOptions& options, double expected_var) {
     auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
     AssertVarStdIs(chunked, options, expected_var);
   }

   void AssertVarStdIsInvalid(const Array& array, const VarianceOptions& options) {
     AssertVarStdIsInvalidInternal(array, options);
   }

   void AssertVarStdIsInvalid(const std::shared_ptr<ChunkedArray>& array,
                              const VarianceOptions& options) {
     AssertVarStdIsInvalidInternal(array, options);
   }

   void AssertVarStdIsInvalid(const std::string& json, const VarianceOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertVarStdIsInvalid(*array, options);
   }

   void AssertVarStdIsInvalid(const std::vector<std::string>& json,
                              const VarianceOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertVarStdIsInvalid(array, options);
   }

   std::shared_ptr<DataType> type_singleton() { return Traits::type_singleton(); }

  private:
   void AssertVarStdIsInternal(const Datum& array, const VarianceOptions& options,
                               double expected_var) {
     ASSERT_OK_AND_ASSIGN(Datum out_var, Variance(array, options));
     ASSERT_OK_AND_ASSIGN(Datum out_std, Stddev(array, options));
     auto var = checked_cast<const ScalarType*>(out_var.scalar().get());
     auto std = checked_cast<const ScalarType*>(out_std.scalar().get());
     ASSERT_TRUE(var->is_valid && std->is_valid);
     ASSERT_DOUBLE_EQ(std->value * std->value, var->value);
     ASSERT_DOUBLE_EQ(var->value, expected_var);  // < 4ULP
   }

   void AssertVarStdIsInvalidInternal(const Datum& array, const VarianceOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out_var, Variance(array, options));
     ASSERT_OK_AND_ASSIGN(Datum out_std, Stddev(array, options));
     auto var = checked_cast<const ScalarType*>(out_var.scalar().get());
     auto std = checked_cast<const ScalarType*>(out_std.scalar().get());
     ASSERT_FALSE(var->is_valid || std->is_valid);
   }
 };

 template <typename ArrowType>
 class TestNumericVarStdKernel : public TestPrimitiveVarStdKernel<ArrowType> {};

 // Reference value from numpy.var
 TYPED_TEST_SUITE(TestNumericVarStdKernel, NumericArrowTypes);
 TYPED_TEST(TestNumericVarStdKernel, Basics) {
   VarianceOptions options;  // ddof = 0, population variance/stddev

   this->AssertVarStdIs("[100]", options, 0);
   this->AssertVarStdIs("[1, 2, 3]", options, 0.6666666666666666);
   this->AssertVarStdIs("[null, 1, 2, null, 3]", options, 0.6666666666666666);

   std::vector<std::string> chunks;
   chunks = {"[]", "[1]", "[2]", "[null]", "[3]"};
   this->AssertVarStdIs(chunks, options, 0.6666666666666666);
   chunks = {"[1, 2, 3]", "[4, 5, 6]", "[7, 8]"};
   this->AssertVarStdIs(chunks, options, 5.25);
   chunks = {"[1, 2, 3, 4, 5, 6, 7]", "[8]"};
   this->AssertVarStdIs(chunks, options, 5.25);

   this->AssertVarStdIsInvalid("[null, null, null]", options);
   this->AssertVarStdIsInvalid("[]", options);
   this->AssertVarStdIsInvalid("[]", options);

   options.ddof = 1;  // sample variance/stddev

   this->AssertVarStdIs("[1, 2]", options, 0.5);

   chunks = {"[1]", "[2]"};
   this->AssertVarStdIs(chunks, options, 0.5);
   chunks = {"[1, 2, 3]", "[4, 5, 6]", "[7, 8]"};
   this->AssertVarStdIs(chunks, options, 6.0);
   chunks = {"[1, 2, 3, 4, 5, 6, 7]", "[8]"};
   this->AssertVarStdIs(chunks, options, 6.0);

   this->AssertVarStdIsInvalid("[100]", options);
   this->AssertVarStdIsInvalid("[100, null, null]", options);
   chunks = {"[100]", "[null]", "[]"};
   this->AssertVarStdIsInvalid(chunks, options);
 }

 // Test numerical stability
 template <typename ArrowType>
 class TestVarStdKernelStability : public TestPrimitiveVarStdKernel<ArrowType> {};

 typedef ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, DoubleType>
     VarStdStabilityTypes;

 TYPED_TEST_SUITE(TestVarStdKernelStability, VarStdStabilityTypes);
 TYPED_TEST(TestVarStdKernelStability, Basics) {
   VarianceOptions options{1};  // ddof = 1
   this->AssertVarStdIs("[100000004, 100000007, 100000013, 100000016]", options, 30.0);
   this->AssertVarStdIs("[1000000004, 1000000007, 1000000013, 1000000016]", options, 30.0);
   if (!is_unsigned_integer_type<TypeParam>::value) {
     this->AssertVarStdIs("[-1000000016, -1000000013, -1000000007, -1000000004]", options,
                          30.0);
   }
 }

 // Test numerical stability of variance merging code
 class TestVarStdKernelMergeStability : public TestPrimitiveVarStdKernel<DoubleType> {};

 TEST_F(TestVarStdKernelMergeStability, Basics) {
   VarianceOptions options{1};  // ddof = 1

 #ifndef __MINGW32__  // MinGW has precision issues
   // XXX: The reference value from numpy is actually wrong due to floating
   // point limits. The correct result should equals variance(90, 0) = 4050.
   std::vector<std::string> chunks = {"[40000008000000490]", "[40000008000000400]"};
   this->AssertVarStdIs(chunks, options, 3904.0);
 #endif
 }

 // Test round-off error
 template <typename ArrowType>
 class TestVarStdKernelRoundOff : public TestPrimitiveVarStdKernel<ArrowType> {};

 typedef ::testing::Types<Int32Type, Int64Type, FloatType, DoubleType> VarStdRoundOffTypes;

 TYPED_TEST_SUITE(TestVarStdKernelRoundOff, VarStdRoundOffTypes);
 TYPED_TEST(TestVarStdKernelRoundOff, Basics) {
   // build array: np.arange(321000, dtype='xxx')
   typename TypeParam::c_type value = 0;
   ASSERT_OK_AND_ASSIGN(
       auto array, ArrayFromBuilderVisitor(TypeTraits<TypeParam>::type_singleton(), 321000,
                                           [&](NumericBuilder<TypeParam>* builder) {
                                             builder->UnsafeAppend(value++);
                                           }));

   // reference value from numpy.var()
   this->AssertVarStdIs(*array, VarianceOptions{0}, 8586749999.916667);
 }

 // Test integer arithmetic code
 class TestVarStdKernelInt32 : public TestPrimitiveVarStdKernel<Int32Type> {};

 TEST_F(TestVarStdKernelInt32, Basics) {
   VarianceOptions options{1};
   this->AssertVarStdIs("[-2147483648, -2147483647, -2147483646]", options, 1.0);
   this->AssertVarStdIs("[2147483645, 2147483646, 2147483647]", options, 1.0);
   this->AssertVarStdIs("[-2147483648, -2147483648, 2147483647]", options,
                        6.148914688373205e+18);
 }

 class TestVarStdKernelUInt32 : public TestPrimitiveVarStdKernel<UInt32Type> {};

 TEST_F(TestVarStdKernelUInt32, Basics) {
   VarianceOptions options{1};
   this->AssertVarStdIs("[4294967293, 4294967294, 4294967295]", options, 1.0);
   this->AssertVarStdIs("[0, 0, 4294967295]", options, 6.148914688373205e+18);
 }

 // https://en.wikipedia.org/wiki/Kahan_summation_algorithm
 void KahanSum(double& sum, double& adjust, double addend) {
   double y = addend - adjust;
   double t = sum + y;
   adjust = (t - sum) - y;
   sum = t;
 }

 // Calculate reference variance with Welford's online algorithm + Kahan summation
 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
 // XXX: not stable for long array with very small `stddev / average`
 template <typename ArrayType>
 std::pair<double, double> WelfordVar(const ArrayType& array) {
   const auto values = array.raw_values();
   BitmapReader reader(array.null_bitmap_data(), array.offset(), array.length());
   double count = 0, mean = 0, m2 = 0;
   double mean_adjust = 0, m2_adjust = 0;
   for (int64_t i = 0; i < array.length(); ++i) {
     if (reader.IsSet()) {
       ++count;
       double delta = static_cast<double>(values[i]) - mean;
       KahanSum(mean, mean_adjust, delta / count);
       double delta2 = static_cast<double>(values[i]) - mean;
       KahanSum(m2, m2_adjust, delta * delta2);
     }
     reader.Next();
   }
   return std::make_pair(m2 / count, m2 / (count - 1));
 }

 // Test random chunked array
 template <typename ArrowType>
 class TestVarStdKernelRandom : public TestPrimitiveVarStdKernel<ArrowType> {};

 typedef ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType,
                          DoubleType>
     VarStdRandomTypes;

 TYPED_TEST_SUITE(TestVarStdKernelRandom, VarStdRandomTypes);
 TYPED_TEST(TestVarStdKernelRandom, Basics) {
   // Cut array into small chunks
   constexpr int array_size = 5000;
   constexpr int chunk_size_max = 50;
   constexpr int chunk_count = array_size / chunk_size_max;

   std::shared_ptr<Array> array;
   auto rand = random::RandomArrayGenerator(0x5487656);
   if (is_floating_type<TypeParam>::value) {
     array = rand.Numeric<TypeParam>(array_size, -10000.0, 100000.0, 0.1);
   } else {
     using CType = typename TypeParam::c_type;
     constexpr CType min = std::numeric_limits<CType>::min();
     constexpr CType max = std::numeric_limits<CType>::max();
     array = rand.Numeric<TypeParam>(array_size, min, max, 0.1);
   }
   auto chunk_size_array = rand.Numeric<Int32Type>(chunk_count, 0, chunk_size_max);
   const int* chunk_size = chunk_size_array->data()->GetValues<int>(1);
   int total_size = 0;

   ArrayVector array_vector;
   for (int i = 0; i < chunk_count; ++i) {
     array_vector.emplace_back(array->Slice(total_size, chunk_size[i]));
     total_size += chunk_size[i];
   }
   auto chunked = *ChunkedArray::Make(array_vector);

   double var_population, var_sample;
   using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
   auto typed_array = checked_pointer_cast<ArrayType>(array->Slice(0, total_size));
   std::tie(var_population, var_sample) = WelfordVar(*typed_array);

   this->AssertVarStdIs(chunked, VarianceOptions{0}, var_population);
   this->AssertVarStdIs(chunked, VarianceOptions{1}, var_sample);
 }

 // This test is too heavy to run in CI, should be checked manually
 #if 0
 class TestVarStdKernelIntegerLength : public TestPrimitiveVarStdKernel<Int32Type> {};

 TEST_F(TestVarStdKernelIntegerLength, Basics) {
   constexpr int32_t min = std::numeric_limits<int32_t>::min();
   constexpr int32_t max = std::numeric_limits<int32_t>::max();
   auto rand = random::RandomArrayGenerator(0x5487657);
   // large data volume
   auto array = rand.Numeric<Int32Type>(4000000000, min, max, 0.1);
   // biased distribution
   // auto array = rand.Numeric<Int32Type>(4000000000, min, min + 100000, 0.1);

   double var_population, var_sample;
   auto int32_array = checked_pointer_cast<Int32Array>(array);
   std::tie(var_population, var_sample) = WelfordVar(*int32_array);

   this->AssertVarStdIs(*array, VarianceOptions{0}, var_population);
   this->AssertVarStdIs(*array, VarianceOptions{1}, var_sample);
 }
 #endif

 //
 // Quantile
 //

 template <typename ArrowType>
 class TestPrimitiveQuantileKernel : public ::testing::Test {
  public:
   using Traits = TypeTraits<ArrowType>;
   using CType = typename ArrowType::c_type;

   void AssertQuantilesAre(const Datum& array, QuantileOptions options,
                           const std::vector<std::vector<Datum>>& expected) {
     ASSERT_EQ(options.q.size(), expected.size());

     for (size_t i = 0; i < this->interpolations_.size(); ++i) {
       options.interpolation = this->interpolations_[i];

       ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
       const auto& out_array = out.make_array();
       ASSERT_OK(out_array->ValidateFull());
       ASSERT_EQ(out_array->length(), options.q.size());
       ASSERT_EQ(out_array->null_count(), 0);
       AssertTypeEqual(out_array->type(), expected[0][i].type());

       if (out_array->type()->Equals(float64())) {
         const double* quantiles = out_array->data()->GetValues<double>(1);
         for (int64_t j = 0; j < out_array->length(); ++j) {
           const auto& numeric_scalar =
               checked_pointer_cast<DoubleScalar>(expected[j][i].scalar());
           ASSERT_TRUE((quantiles[j] == numeric_scalar->value) ||
                       (std::isnan(quantiles[j]) && std::isnan(numeric_scalar->value)));
         }
       } else {
         AssertTypeEqual(out_array->type(), type_singleton());
         const CType* quantiles = out_array->data()->GetValues<CType>(1);
         for (int64_t j = 0; j < out_array->length(); ++j) {
           const auto& numeric_scalar =
               checked_pointer_cast<NumericScalar<ArrowType>>(expected[j][i].scalar());
           ASSERT_EQ(quantiles[j], numeric_scalar->value);
         }
       }
     }
   }

   void AssertQuantilesAre(const std::string& json, const std::vector<double>& q,
                           const std::vector<std::vector<Datum>>& expected) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertQuantilesAre(array, QuantileOptions{q}, expected);
   }

   void AssertQuantilesAre(const std::vector<std::string>& json,
                           const std::vector<double>& q,
                           const std::vector<std::vector<Datum>>& expected) {
     auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
     AssertQuantilesAre(chunked, QuantileOptions{q}, expected);
   }

   void AssertQuantileIs(const Datum& array, double q,
                         const std::vector<Datum>& expected) {
     AssertQuantilesAre(array, QuantileOptions{q}, {expected});
   }

   void AssertQuantileIs(const std::string& json, double q,
                         const std::vector<Datum>& expected) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertQuantileIs(array, q, expected);
   }

   void AssertQuantileIs(const std::vector<std::string>& json, double q,
                         const std::vector<Datum>& expected) {
     auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
     AssertQuantileIs(chunked, q, expected);
   }

   void AssertQuantilesEmpty(const Datum& array, const std::vector<double>& q) {
     QuantileOptions options{q};
     for (auto interpolation : this->interpolations_) {
       options.interpolation = interpolation;
       ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
       ASSERT_OK(out.make_array()->ValidateFull());
       ASSERT_EQ(out.array()->length, 0);
     }
   }

   void AssertQuantilesEmpty(const std::string& json, const std::vector<double>& q) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertQuantilesEmpty(array, q);
   }

   void AssertQuantilesEmpty(const std::vector<std::string>& json,
                             const std::vector<double>& q) {
     auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
     AssertQuantilesEmpty(chunked, q);
   }

   std::shared_ptr<DataType> type_singleton() { return Traits::type_singleton(); }

   std::vector<enum QuantileOptions::Interpolation> interpolations_ = {
       QuantileOptions::LINEAR, QuantileOptions::LOWER, QuantileOptions::HIGHER,
       QuantileOptions::NEAREST, QuantileOptions::MIDPOINT};
 };

 #define INTYPE(x) Datum(static_cast<typename TypeParam::c_type>(x))
 #define DOUBLE(x) Datum(static_cast<double>(x))
 // output type per interplation: linear, lower, higher, nearest, midpoint
 #define O(a, b, c, d, e) \
   { DOUBLE(a), INTYPE(b), INTYPE(c), INTYPE(d), DOUBLE(e) }

 template <typename ArrowType>
 class TestIntegerQuantileKernel : public TestPrimitiveQuantileKernel<ArrowType> {};

 TYPED_TEST_SUITE(TestIntegerQuantileKernel, IntegralArrowTypes);
 TYPED_TEST(TestIntegerQuantileKernel, Basics) {
   // reference values from numpy
   // ordered by interpolation method: {linear, lower, higher, nearest, midpoint}
   this->AssertQuantileIs("[1]", 0.1, O(1, 1, 1, 1, 1));
   this->AssertQuantileIs("[1, 2]", 0.5, O(1.5, 1, 2, 1, 1.5));
   this->AssertQuantileIs("[3, 5, 2, 9, 0, 1, 8]", 0.5, O(3, 3, 3, 3, 3));
   this->AssertQuantileIs("[3, 5, 2, 9, 0, 1, 8]", 0.33, O(1.98, 1, 2, 2, 1.5));
   this->AssertQuantileIs("[3, 5, 2, 9, 0, 1, 8]", 0.9, O(8.4, 8, 9, 8, 8.5));
   this->AssertQuantilesAre("[3, 5, 2, 9, 0, 1, 8]", {0.5, 0.9},
                            {O(3, 3, 3, 3, 3), O(8.4, 8, 9, 8, 8.5)});
   this->AssertQuantilesAre("[3, 5, 2, 9, 0, 1, 8]", {1, 0.5},
                            {O(9, 9, 9, 9, 9), O(3, 3, 3, 3, 3)});
   this->AssertQuantileIs("[3, 5, 2, 9, 0, 1, 8]", 0, O(0, 0, 0, 0, 0));
   this->AssertQuantileIs("[3, 5, 2, 9, 0, 1, 8]", 1, O(9, 9, 9, 9, 9));

   this->AssertQuantileIs("[5, null, null, 3, 9, null, 8, 1, 2, 0]", 0.21,
                          O(1.26, 1, 2, 1, 1.5));
   this->AssertQuantilesAre("[5, null, null, 3, 9, null, 8, 1, 2, 0]", {0.5, 0.9},
                            {O(3, 3, 3, 3, 3), O(8.4, 8, 9, 8, 8.5)});
   this->AssertQuantilesAre("[5, null, null, 3, 9, null, 8, 1, 2, 0]", {0.9, 0.5},
                            {O(8.4, 8, 9, 8, 8.5), O(3, 3, 3, 3, 3)});

   this->AssertQuantileIs({"[5]", "[null, null]", "[3, 9, null]", "[8, 1, 2, 0]"}, 0.33,
                          O(1.98, 1, 2, 2, 1.5));
   this->AssertQuantilesAre({"[5]", "[null, null]", "[3, 9, null]", "[8, 1, 2, 0]"},
                            {0.21, 1}, {O(1.26, 1, 2, 1, 1.5), O(9, 9, 9, 9, 9)});

   this->AssertQuantilesEmpty("[]", {0.5});
   this->AssertQuantilesEmpty("[null, null, null]", {0.1, 0.2});
   this->AssertQuantilesEmpty({"[null, null]", "[]", "[null]"}, {0.3, 0.4});
 }

 template <typename ArrowType>
 class TestFloatingQuantileKernel : public TestPrimitiveQuantileKernel<ArrowType> {};

 #ifndef __MINGW32__
 TYPED_TEST_SUITE(TestFloatingQuantileKernel, RealArrowTypes);
 TYPED_TEST(TestFloatingQuantileKernel, Floats) {
   // ordered by interpolation method: {linear, lower, higher, nearest, midpoint}
   this->AssertQuantileIs("[-9, 7, Inf, -Inf, 2, 11]", 0.5, O(4.5, 2, 7, 2, 4.5));
   this->AssertQuantileIs("[-9, 7, Inf, -Inf, 2, 11]", 0.1,
                          O(-INFINITY, -INFINITY, -9, -INFINITY, -INFINITY));
   this->AssertQuantileIs("[-9, 7, Inf, -Inf, 2, 11]", 0.9,
                          O(INFINITY, 11, INFINITY, 11, INFINITY));
   this->AssertQuantilesAre("[-9, 7, Inf, -Inf, 2, 11]", {0.3, 0.6},
                            {O(-3.5, -9, 2, 2, -3.5), O(7, 7, 7, 7, 7)});
   this->AssertQuantileIs("[-Inf, Inf]", 0.2, O(NAN, -INFINITY, INFINITY, -INFINITY, NAN));

   this->AssertQuantileIs("[NaN, -9, 7, Inf, null, null, -Inf, NaN, 2, 11]", 0.5,
                          O(4.5, 2, 7, 2, 4.5));
   this->AssertQuantilesAre("[null, -9, 7, Inf, NaN, NaN, -Inf, null, 2, 11]", {0.3, 0.6},
                            {O(-3.5, -9, 2, 2, -3.5), O(7, 7, 7, 7, 7)});
   this->AssertQuantilesAre("[null, -9, 7, Inf, NaN, NaN, -Inf, null, 2, 11]", {0.6, 0.3},
                            {O(7, 7, 7, 7, 7), O(-3.5, -9, 2, 2, -3.5)});

   this->AssertQuantileIs({"[NaN, -9, 7, Inf]", "[null, NaN]", "[-Inf, NaN, 2, 11]"}, 0.5,
                          O(4.5, 2, 7, 2, 4.5));
   this->AssertQuantilesAre({"[null, -9, 7, Inf]", "[NaN, NaN]", "[-Inf, null, 2, 11]"},
                            {0.3, 0.6}, {O(-3.5, -9, 2, 2, -3.5), O(7, 7, 7, 7, 7)});

   this->AssertQuantilesEmpty("[]", {0.5, 0.6});
   this->AssertQuantilesEmpty("[null, NaN, null]", {0.1});
   this->AssertQuantilesEmpty({"[NaN, NaN]", "[]", "[null]"}, {0.3, 0.4});
 }

 class TestInt8QuantileKernel : public TestPrimitiveQuantileKernel<Int8Type> {};

 // Test histogram approach
 TEST_F(TestInt8QuantileKernel, Int8) {
   using TypeParam = Int8Type;
   this->AssertQuantilesAre(
       "[127, -128, null, -128, 66, -88, 127]", {0, 0.3, 0.7, 1},
       {O(-128, -128, -128, -128, -128), O(-108, -128, -88, -88, -108),
        O(96.5, 66, 127, 127, 96.5), O(127, 127, 127, 127, 127)});
   this->AssertQuantilesAre(
       {"[null]", "[-88, 127]", "[]", "[66, -128, null, -128]", "[127]"}, {0, 0.3, 0.7, 1},
       {O(-128, -128, -128, -128, -128), O(-108, -128, -88, -88, -108),
        O(96.5, 66, 127, 127, 96.5), O(127, 127, 127, 127, 127)});
 }
 #endif

 class TestInt64QuantileKernel : public TestPrimitiveQuantileKernel<Int64Type> {};

 // Test big int64 numbers cannot be precisely presented by double
 TEST_F(TestInt64QuantileKernel, Int64) {
   using TypeParam = Int64Type;
   this->AssertQuantileIs(
       "[9223372036854775806, 9223372036854775807]", 0.5,
       O(9.223372036854776e+18, 9223372036854775806, 9223372036854775807,
         9223372036854775806, 9.223372036854776e+18));
 }

 #undef INTYPE
 #undef DOUBLE
 #undef O

 #ifndef __MINGW32__
 class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType> {
  public:
   void CheckQuantiles(int64_t array_size, int64_t num_quantiles) {
     std::shared_ptr<Array> array;
     std::vector<double> quantiles;
     // small value range to exercise input array with equal values and histogram approach
     GenerateTestData(array_size, num_quantiles, -100, 200, &array, &quantiles);

     this->AssertQuantilesAre(array, QuantileOptions{quantiles},
                              NaiveQuantile(*array, quantiles, interpolations_));
   }

   void CheckTDigests(const std::vector<int>& chunk_sizes, int64_t num_quantiles) {
     int total_size = 0;
     for (int size : chunk_sizes) {
       total_size += size;
     }
     std::shared_ptr<Array> array;
     std::vector<double> quantiles;
     GenerateTestData(total_size, num_quantiles, 100, 123456789, &array, &quantiles);

     total_size = 0;
     ArrayVector array_vector;
     for (int size : chunk_sizes) {
       array_vector.emplace_back(array->Slice(total_size, size));
       total_size += size;
     }
     auto chunked = *ChunkedArray::Make(array_vector);

     TDigestOptions options(quantiles);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, options));
     const auto& out_array = out.make_array();
     ASSERT_OK(out_array->ValidateFull());
     ASSERT_EQ(out_array->length(), quantiles.size());
     ASSERT_EQ(out_array->null_count(), 0);
     AssertTypeEqual(out_array->type(), float64());

     // linear interpolated exact quantile as reference
     std::vector<std::vector<Datum>> exact =
         NaiveQuantile(*array, quantiles, {QuantileOptions::LINEAR});
     const double* approx = out_array->data()->GetValues<double>(1);
     for (size_t i = 0; i < quantiles.size(); ++i) {
       const auto& exact_scalar = checked_pointer_cast<DoubleScalar>(exact[i][0].scalar());
       const double tolerance = std::fabs(exact_scalar->value) * 0.05;
       EXPECT_NEAR(approx[i], exact_scalar->value, tolerance) << quantiles[i];
     }
   }

  private:
   void GenerateTestData(int64_t array_size, int64_t num_quantiles, int min, int max,
                         std::shared_ptr<Array>* array, std::vector<double>* quantiles) {
     auto rand = random::RandomArrayGenerator(0x5487658);
     *array = rand.Float64(array_size, min, max, /*null_prob=*/0.1, /*nan_prob=*/0.2);

     random_real(num_quantiles, 0x5487658, 0.0, 1.0, quantiles);
     // make sure to exercise 0 and 1 quantiles
     *std::min_element(quantiles->begin(), quantiles->end()) = 0;
     *std::max_element(quantiles->begin(), quantiles->end()) = 1;
   }

   std::vector<std::vector<Datum>> NaiveQuantile(
       const Array& array, const std::vector<double>& quantiles,
       const std::vector<enum QuantileOptions::Interpolation>& interpolations) {
     // copy and sort input array
     std::vector<double> input(array.length() - array.null_count());
     const double* values = array.data()->GetValues<double>(1);
     const auto bitmap = array.null_bitmap_data();
     int64_t index = 0;
     for (int64_t i = 0; i < array.length(); ++i) {
       if (BitUtil::GetBit(bitmap, i) && !std::isnan(values[i])) {
         input[index++] = values[i];
       }
     }
     input.resize(index);
     std::sort(input.begin(), input.end());

     std::vector<std::vector<Datum>> output(quantiles.size(),
                                            std::vector<Datum>(interpolations.size()));
     for (uint64_t i = 0; i < interpolations.size(); ++i) {
       const auto interp = interpolations[i];
       for (uint64_t j = 0; j < quantiles.size(); ++j) {
         output[j][i] = GetQuantile(input, quantiles[j], interp);
       }
     }
     return output;
   }

   Datum GetQuantile(const std::vector<double>& input, double q,
                     enum QuantileOptions::Interpolation interp) {
     const double index = (input.size() - 1) * q;
     const uint64_t lower_index = static_cast<uint64_t>(index);
     const double fraction = index - lower_index;

     switch (interp) {
       case QuantileOptions::LOWER:
         return Datum(input[lower_index]);
       case QuantileOptions::HIGHER:
         return Datum(input[lower_index + (fraction != 0)]);
       case QuantileOptions::NEAREST:
         if (fraction < 0.5) {
           return Datum(input[lower_index]);
         } else if (fraction > 0.5) {
           return Datum(input[lower_index + 1]);
         } else {
           return Datum(input[lower_index + (lower_index & 1)]);
         }
       case QuantileOptions::LINEAR:
         if (fraction == 0) {
           return Datum(input[lower_index]);
         } else {
           return Datum(fraction * input[lower_index + 1] +
                        (1 - fraction) * input[lower_index]);
         }
       case QuantileOptions::MIDPOINT:
         if (fraction == 0) {
           return Datum(input[lower_index]);
         } else {
           return Datum(input[lower_index] / 2.0 + input[lower_index + 1] / 2.0);
         }
       default:
         return Datum(NAN);
     }
   }
 };

 TEST_F(TestRandomQuantileKernel, Normal) {
   // exercise copy and sort approach: size < 65536
   this->CheckQuantiles(/*array_size=*/10000, /*num_quantiles=*/100);
 }

 TEST_F(TestRandomQuantileKernel, Overlapped) {
   // much more quantiles than array size => many overlaps
   this->CheckQuantiles(/*array_size=*/999, /*num_quantiles=*/9999);
 }

 TEST_F(TestRandomQuantileKernel, Histogram) {
   // exercise histogram approach: size >= 65536, range <= 65536
   this->CheckQuantiles(/*array_size=*/80000, /*num_quantiles=*/100);
 }

 TEST_F(TestRandomQuantileKernel, TDigest) {
   this->CheckTDigests(/*chunk_sizes=*/{12345, 6789, 8765, 4321}, /*num_quantiles=*/100);
 }
 #endif

 class TestTDigestKernel : public ::testing::Test {};

 TEST_F(TestTDigestKernel, AllNullsOrNaNs) {
   const std::vector<std::vector<std::string>> tests = {
       {"[]"},
       {"[null, null]", "[]", "[null]"},
       {"[NaN]", "[NaN, NaN]", "[]"},
       {"[null, NaN, null]"},
       {"[NaN, NaN]", "[]", "[null]"},
   };

   for (const auto& json : tests) {
     auto chunked = ChunkedArrayFromJSON(float64(), json);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, TDigestOptions()));
     ASSERT_OK(out.make_array()->ValidateFull());
     ASSERT_EQ(out.array()->length, 0);
   }
 }

 }  // namespace compute
 }  // namespace arrow