blob: f6a2de292c5e210d15691162263e39189da74eae [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <string>
#include "function_test_util.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_hll.h"
#include "vec/data_types/data_type_number.h"
namespace doris::vectorized {
TEST(function_hll_test, function_hll_cardinality_test) {
std::string func_name = "hll_cardinality";
InputTypeSet input_types = {PrimitiveType::TYPE_HLL};
const std::string input1 = "test";
const uint64_t hash_value1 =
HashUtil::murmur_hash64A(input1.data(), input1.size(), HashUtil::MURMUR_SEED);
HyperLogLog hll1(hash_value1);
const std::string input2 = " ";
const uint64_t hash_value2 =
HashUtil::murmur_hash64A(input2.data(), input2.size(), HashUtil::MURMUR_SEED);
HyperLogLog hll2(hash_value2);
HyperLogLog hll3(HLL_DATA_EXPLICIT);
hll3.update(hash_value1);
hll3.update(hash_value2);
// we update the same hash value twice, the result should be the same as update once, which is 2
HyperLogLog hll4(HLL_DATA_EXPLICIT);
hll4.update(hash_value1);
hll4.update(hash_value1);
HyperLogLog empty_hll;
DataSet data_set = {{{&hll1}, (int64_t)1}, {{&hll2}, (int64_t)1}, {{&hll3}, (int64_t)3},
{{&hll4}, (int64_t)2}, {{&empty_hll}, (int64_t)0}, {{Null()}, (int64_t)0}};
static_cast<void>(check_function<DataTypeInt64>(func_name, input_types, data_set));
}
TEST(function_hll_test, function_hll_to_base64_test) {
std::string func_name = "hll_to_base64";
InputTypeSet input_types = {PrimitiveType::TYPE_HLL};
const std::string input1 = "test";
const uint64_t hash_value1 =
HashUtil::murmur_hash64A(input1.data(), input1.size(), HashUtil::MURMUR_SEED);
HyperLogLog hll1(hash_value1);
const std::string input2 = " ";
const uint64_t hash_value2 =
HashUtil::murmur_hash64A(input2.data(), input2.size(), HashUtil::MURMUR_SEED);
HyperLogLog hll2(hash_value2);
HyperLogLog hll3;
hll3.update(hash_value1);
hll3.update(hash_value2);
// Although the hll4 update the hash_value1 twice, the result should be the same as update once.
HyperLogLog hll4;
hll4.update(hash_value1);
hll4.update(hash_value2);
hll4.update(hash_value1);
HyperLogLog empty_hll;
DataSet data_set = {{{&hll1}, std::string("AQHm5IIJCx0h/w==")},
{{&hll2}, std::string("AQG/Hk98sO59Sw==")},
{{&hll3}, std::string("AQLm5IIJCx0h/78eT3yw7n1L")},
{{&hll4}, std::string("AQLm5IIJCx0h/78eT3yw7n1L")},
{{&empty_hll}, std::string("AA==")},
{{Null()}, Null()}};
static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
}
TEST(function_hll_test, function_hll_from_base64_test) {
std::string func_name = "hll_from_base64";
InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
const std::string input1 = "AQHm5IIJCx0h/w==";
const std::string output1 = "test";
const uint64_t hash_value1 =
HashUtil::murmur_hash64A(output1.data(), output1.size(), HashUtil::MURMUR_SEED);
HyperLogLog hll1(hash_value1);
const std::string input2 = "AQG/Hk98sO59Sw==";
const std::string output2 = " ";
const uint64_t hash_value2 =
HashUtil::murmur_hash64A(output2.data(), output2.size(), HashUtil::MURMUR_SEED);
HyperLogLog hll2(hash_value2);
const std::string input3 = "AQLm5IIJCx0h/78eT3yw7n1L";
HyperLogLog hll3;
hll3.update(hash_value1);
hll3.update(hash_value2);
// Although the hll4 update the hash_value1 twice, the result should be the same as update once.
const std::string input4 = input3;
HyperLogLog hll4;
hll4.update(hash_value1);
hll4.update(hash_value2);
hll4.update(hash_value1);
const std::string input5 = "AA==";
HyperLogLog empty_hll;
DataSet data_set = {{{input1}, &hll1},
{{input2}, &hll2},
{{input3}, &hll3},
{{input4}, &hll4},
{{input5}, &empty_hll}};
static_cast<void>(check_function<DataTypeHLL, true>(func_name, input_types, data_set));
}
} // namespace doris::vectorized