blob: 5f1572e1b87a66f7cdc6e71438434813e06c663a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "olap/hll.h"
#include <gtest/gtest.h>
#include "util/hash_util.hpp"
#include "util/slice.h"
namespace doris {
class TestHll : public testing::Test {
public:
virtual ~TestHll() {}
};
static uint64_t hash(uint64_t value) {
return HashUtil::murmur_hash64A(&value, 8, 0);
}
// keep logic same with java version in fe when you change hll_test.cpp,see HllTest.java
TEST_F(TestHll, Normal) {
uint8_t buf[HLL_REGISTERS_COUNT + 1];
// empty
{
Slice str((char*)buf, 0);
ASSERT_FALSE(HyperLogLog::is_valid(str));
}
// check unknown type
{
buf[0] = 60;
Slice str((char*)buf, 1);
ASSERT_FALSE(HyperLogLog::is_valid(str));
}
// empty
{
HyperLogLog empty_hll;
int len = empty_hll.serialize(buf);
ASSERT_EQ(1, len);
HyperLogLog test_hll(Slice((char*)buf, len));
ASSERT_EQ(0, test_hll.estimate_cardinality());
// check serialize
{
Slice str((char*)buf, len);
ASSERT_TRUE(HyperLogLog::is_valid(str));
}
{
Slice str((char*)buf, len + 1);
ASSERT_FALSE(HyperLogLog::is_valid(str));
}
}
// explicit [0. 100)
HyperLogLog explicit_hll;
{
for (int i = 0; i < 100; ++i) {
explicit_hll.update(hash(i));
}
int len = explicit_hll.serialize(buf);
ASSERT_EQ(1 + 1 + 100 * 8, len);
// check serialize
{
Slice str((char*)buf, len);
ASSERT_TRUE(HyperLogLog::is_valid(str));
}
{
Slice str((char*)buf, 1);
ASSERT_FALSE(HyperLogLog::is_valid(str));
}
HyperLogLog test_hll(Slice((char*)buf, len));
test_hll.update(hash(0));
{
HyperLogLog other_hll;
for (int i = 0; i < 100; ++i) {
other_hll.update(hash(i));
}
test_hll.merge(other_hll);
}
ASSERT_EQ(100, test_hll.estimate_cardinality());
}
// sparse [1024, 2048)
HyperLogLog sparse_hll;
{
for (int i = 0; i < 1024; ++i) {
sparse_hll.update(hash(i + 1024));
}
int len = sparse_hll.serialize(buf);
ASSERT_TRUE(len < HLL_REGISTERS_COUNT + 1);
// check serialize
{
Slice str((char*)buf, len);
ASSERT_TRUE(HyperLogLog::is_valid(str));
}
{
Slice str((char*)buf, 1 + 3);
ASSERT_FALSE(HyperLogLog::is_valid(str));
}
HyperLogLog test_hll(Slice((char*)buf, len));
test_hll.update(hash(1024));
{
HyperLogLog other_hll;
for (int i = 0; i < 1024; ++i) {
other_hll.update(hash(i + 1024));
}
test_hll.merge(other_hll);
}
auto cardinality = test_hll.estimate_cardinality();
ASSERT_EQ(sparse_hll.estimate_cardinality(), cardinality);
// 2% error rate
ASSERT_TRUE(cardinality > 1000 && cardinality < 1045);
}
// full [64 * 1024, 128 * 1024)
HyperLogLog full_hll;
{
for (int i = 0; i < 64 * 1024; ++i) {
full_hll.update(hash(64 * 1024 + i));
}
int len = full_hll.serialize(buf);
ASSERT_EQ(HLL_REGISTERS_COUNT + 1, len);
// check serialize
{
Slice str((char*)buf, len);
ASSERT_TRUE(HyperLogLog::is_valid(str));
}
{
Slice str((char*)buf, len + 1);
ASSERT_FALSE(HyperLogLog::is_valid(str));
}
HyperLogLog test_hll(Slice((char*)buf, len));
auto cardinality = test_hll.estimate_cardinality();
ASSERT_EQ(full_hll.estimate_cardinality(), cardinality);
// 2% error rate
ASSERT_TRUE(cardinality > 62 * 1024 && cardinality < 66 * 1024);
}
// merge explicit to empty_hll
{
HyperLogLog new_explicit_hll;
new_explicit_hll.merge(explicit_hll);
ASSERT_EQ(100, new_explicit_hll.estimate_cardinality());
// merge another explicit
{
HyperLogLog other_hll;
for (int i = 100; i < 200; ++i) {
other_hll.update(hash(i));
}
// this is converted to full
other_hll.merge(new_explicit_hll);
ASSERT_TRUE(other_hll.estimate_cardinality() > 190);
}
// merge full
{
new_explicit_hll.merge(full_hll);
ASSERT_TRUE(new_explicit_hll.estimate_cardinality() > full_hll.estimate_cardinality());
}
}
// merge sparse to empty_hll
{
HyperLogLog new_sparse_hll;
new_sparse_hll.merge(sparse_hll);
ASSERT_EQ(sparse_hll.estimate_cardinality(), new_sparse_hll.estimate_cardinality());
// merge explicit
new_sparse_hll.merge(explicit_hll);
ASSERT_TRUE(new_sparse_hll.estimate_cardinality() > sparse_hll.estimate_cardinality());
// merge full
new_sparse_hll.merge(full_hll);
ASSERT_TRUE(new_sparse_hll.estimate_cardinality() > full_hll.estimate_cardinality());
}
}
TEST_F(TestHll, InvalidPtr) {
{
HyperLogLog hll(Slice((char*)nullptr, 0));
ASSERT_EQ(0, hll.estimate_cardinality());
}
{
uint8_t buf[64] = {60};
HyperLogLog hll(Slice(buf, 1));
ASSERT_EQ(0, hll.estimate_cardinality());
}
}
} // namespace doris
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}