blob: 509469bddbcf9f27d7f85071b397e232cbae8dc3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <catch.hpp>
#include <cstdio>
#include "hll.hpp"
#include "HllUtil.hpp"
namespace datasketches {
/*
// hex format for comparing serialized bytes
// previously used with cppunit testing to display results upon mismatch.
// catch2 testing framework provides such output, but this may be easier for debugging
// with long vectors. keeping the code for now.
static std::string toString(const datasketches::hll_sketch::vector_bytes& v) {
std::ostringstream s;
s << std::hex << std::setfill('0');
int cnt = 0;
for (uint8_t byte: v) {
if (cnt == 8) { // insert space after each 8 bytes for readability
s << ' ';
cnt = 0;
} else {
++cnt;
}
s << std::setw(2) << static_cast<int>(byte);
}
return s.str();
}
*/
// if lg_k >= 8, mode != SET!
static int get_n(int lg_k, hll_mode mode) {
if (mode == LIST) return 4;
if (mode == SET) return 1 << (lg_k - 4);
return ((lg_k < 8) && (mode == HLL)) ? (1 << lg_k) : 1 << (lg_k - 3);
}
static long v = 0;
static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode) {
hll_sketch sk(lg_k, hll_type);
int n = get_n(lg_k, mode);
for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
v += n;
return sk;
}
// merges a sketch to an empty union and gets result of the same type, checks binary equivalence
static void union_one_update(bool compact) {
for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
target_hll_type hll_type = (target_hll_type) t;
hll_sketch sk1 = build_sketch(lg_k, hll_type, (hll_mode) mode);
hll_union u(lg_k);
u.update(sk1);
hll_sketch sk2 = u.get_result(hll_type);
auto bytes1 = compact ? sk1.serialize_compact() : sk1.serialize_updatable();
auto bytes2 = compact ? sk2.serialize_compact() : sk2.serialize_updatable();
auto msg = "LgK=" + std::to_string(lg_k)
+ ", Mode=" + std::to_string(mode)
+ ", Type=" + std::to_string(hll_type)
+ "\n" + sk1.to_string(true, true, true, true)
+ "\n" + sk2.to_string(true, true, true, true);
if (bytes1 != bytes2) {
std::cerr << msg << std::endl;
REQUIRE(bytes1 == bytes2);
}
}
}
}
}
TEST_CASE("hll isomorphic: union one update serialize updatable", "[hll_isomorphic]") {
union_one_update(false);
}
TEST_CASE("hll isomorphic: union one update serialize compact", "[hll_isomorphic]") {
union_one_update(true);
}
// converts a sketch to a different type and converts back to the original type to check binary equivalence
static void convert_back_and_forth(bool compact) {
for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8
target_hll_type hll_type1 = (target_hll_type) t1;
hll_sketch sk1 = build_sketch(lg_k, hll_type1, (hll_mode) mode);
auto bytes1 = compact ? sk1.serialize_compact() : sk1.serialize_updatable();
for (int t2 = 0; t2 <= 2; t2++) { // HLL_4, HLL_6, HLL_8
if (t2 == t1) continue;
target_hll_type hll_type2 = (target_hll_type) t2;
hll_sketch sk2(hll_sketch(sk1, hll_type2), hll_type1);
auto bytes2 = compact ? sk2.serialize_compact() : sk2.serialize_updatable();
auto msg = "LgK=" + std::to_string(lg_k)
+ ", Mode=" + std::to_string(mode)
+ ", Type1=" + std::to_string(hll_type1)
+ ", Type2=" + std::to_string(hll_type2)
+ "\n" + sk1.to_string(true, true, true, true)
+ "\n" + sk2.to_string(true, true, true, true);
if (bytes1 != bytes2) {
std::cerr << msg << std::endl;
REQUIRE(bytes1 == bytes2);
}
}
}
}
}
}
TEST_CASE("hll isomorphic: convert back and forth serialize updatable", "[hll_isomorphic]") {
convert_back_and_forth(false);
}
TEST_CASE("hll isomorphic: convert back and forth serialize compact", "[hll_isomorphic]") {
convert_back_and_forth(true);
}
} /* namespace datasketches */