blob: e941b5b899f92a2abb168cefd1bc0d4ef7296c8f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "hll.hpp"
//#include "HllArray.hpp"
//#include "HllSketch.hpp"
#include <exception>
#include <sstream>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
namespace datasketches {
class HllArrayTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(HllArrayTest);
CPPUNIT_TEST(checkCompositeEstimate);
CPPUNIT_TEST(checkSerializeDeserialize);
CPPUNIT_TEST(checkIsCompact);
CPPUNIT_TEST(checkCorruptBytearray);
CPPUNIT_TEST(checkCorruptStream);
CPPUNIT_TEST_SUITE_END();
void testComposite(const int lgK, const target_hll_type tgtHllType, const int n) {
hll_union u(lgK);
hll_sketch sk(lgK, tgtHllType);
for (int i = 0; i < n; ++i) {
u.update(i);
sk.update(i);
}
u.update(sk); // merge
hll_sketch res = u.get_result(target_hll_type::HLL_8);
double est = res.get_composite_estimate();
CPPUNIT_ASSERT_DOUBLES_EQUAL(est, sk.get_composite_estimate(), 0.0);
}
void checkCompositeEstimate() {
testComposite(4, target_hll_type::HLL_8, 10000);
testComposite(5, target_hll_type::HLL_8, 10000);
testComposite(6, target_hll_type::HLL_8, 10000);
testComposite(13, target_hll_type::HLL_8, 10000);
}
void checkSerializeDeserialize() {
int lgK = 4;
int n = 8;
serializeDeserialize(lgK, HLL_4, n);
serializeDeserialize(lgK, HLL_6, n);
serializeDeserialize(lgK, HLL_8, n);
lgK = 15;
n = (((1 << (lgK - 3))*3)/4) + 100;
serializeDeserialize(lgK, HLL_4, n);
serializeDeserialize(lgK, HLL_6, n);
serializeDeserialize(lgK, HLL_8, n);
lgK = 21;
n = (((1 << (lgK - 3))*3)/4) + 1000;
serializeDeserialize(lgK, HLL_4, n);
serializeDeserialize(lgK, HLL_6, n);
serializeDeserialize(lgK, HLL_8, n);
}
void serializeDeserialize(const int lgK, target_hll_type tgtHllType, const int n) {
hll_sketch sk1(lgK, tgtHllType);
for (int i = 0; i < n; ++i) {
sk1.update(i);
}
//CPPUNIT_ASSERT(sk1.getCurrentMode() == CurMode::HLL);
double est1 = sk1.get_estimate();
CPPUNIT_ASSERT_DOUBLES_EQUAL(n, est1, n * 0.03);
// serialize as compact and updatable, deserialize, compare estimates are exact
std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
sk1.serialize_compact(ss);
hll_sketch sk2 = hll_sketch::deserialize(ss);
CPPUNIT_ASSERT_DOUBLES_EQUAL(sk2.get_estimate(), sk1.get_estimate(), 0.0);
ss.clear();
sk1.serialize_updatable(ss);
sk2 = hll_sketch::deserialize(ss);
CPPUNIT_ASSERT_DOUBLES_EQUAL(sk2.get_estimate(), sk1.get_estimate(), 0.0);
sk1.reset();
CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, sk1.get_estimate(), 0.0);
}
void checkIsCompact() {
hll_sketch sk(4);
for (int i = 0; i < 8; ++i) {
sk.update(i);
}
CPPUNIT_ASSERT(!sk.is_compact());
}
void checkCorruptBytearray() {
int lgK = 8;
hll_sketch sk1(lgK, HLL_8);
for (int i = 0; i < 50; ++i) {
sk1.update(i);
}
std::pair<byte_ptr_with_deleter, size_t> sketchBytes = sk1.serialize_compact();
uint8_t* bytes = sketchBytes.first.get();
bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0;
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in preInts byte",
hll_sketch::deserialize(bytes, sketchBytes.second),
std::invalid_argument);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in preInts byte",
HllArray<>::newHll(bytes, sketchBytes.second),
std::invalid_argument);
bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HLL_PREINTS;
bytes[HllUtil<>::SER_VER_BYTE] = 0;
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in serialization version byte",
hll_sketch::deserialize(bytes, sketchBytes.second),
std::invalid_argument);
bytes[HllUtil<>::SER_VER_BYTE] = HllUtil<>::SER_VER;
bytes[HllUtil<>::FAMILY_BYTE] = 0;
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in family id byte",
hll_sketch::deserialize(bytes, sketchBytes.second),
std::invalid_argument);
bytes[HllUtil<>::FAMILY_BYTE] = HllUtil<>::FAMILY_ID;
uint8_t tmp = bytes[HllUtil<>::MODE_BYTE];
bytes[HllUtil<>::MODE_BYTE] = 0x10; // HLL_6, LIST
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in mode byte",
hll_sketch::deserialize(bytes, sketchBytes.second),
std::invalid_argument);
bytes[HllUtil<>::MODE_BYTE] = tmp;
tmp = bytes[HllUtil<>::LG_ARR_BYTE];
bytes[HllUtil<>::LG_ARR_BYTE] = 0;
hll_sketch::deserialize(bytes, sketchBytes.second);
// should work fine despite the corruption
bytes[HllUtil<>::LG_ARR_BYTE] = tmp;
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in serialized length",
hll_sketch::deserialize(bytes, sketchBytes.second - 1),
std::invalid_argument);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in serialized length",
hll_sketch::deserialize(bytes, 3),
std::invalid_argument);
}
void checkCorruptStream() {
int lgK = 6;
hll_sketch sk1(lgK);
for (int i = 0; i < 50; ++i) {
sk1.update(i);
}
std::stringstream ss;
sk1.serialize_compact(ss);
ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
ss.put(0);
ss.seekg(0);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in preInts byte",
hll_sketch::deserialize(ss),
std::invalid_argument);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in preInts byte",
HllArray<>::newHll(ss),
std::invalid_argument);
ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
ss.put(HllUtil<>::HLL_PREINTS);
ss.seekp(HllUtil<>::SER_VER_BYTE);
ss.put(0);
ss.seekg(0);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in serialization version byte",
hll_sketch::deserialize(ss),
std::invalid_argument);
ss.seekp(HllUtil<>::SER_VER_BYTE);
ss.put(HllUtil<>::SER_VER);
ss.seekp(HllUtil<>::FAMILY_BYTE);
ss.put(0);
ss.seekg(0);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in family id byte",
hll_sketch::deserialize(ss),
std::invalid_argument);
ss.seekp(HllUtil<>::FAMILY_BYTE);
ss.put(HllUtil<>::FAMILY_ID);
ss.seekg(HllUtil<>::MODE_BYTE);
uint8_t tmp = ss.get();
ss.seekp(HllUtil<>::MODE_BYTE);
ss.put(0x11); // HLL_6, SET
ss.seekg(0);
CPPUNIT_ASSERT_THROW_MESSAGE("Failed to detect error in mode byte",
hll_sketch::deserialize(ss),
std::invalid_argument);
ss.seekp(HllUtil<>::MODE_BYTE);
ss.put(tmp);
ss.seekg(HllUtil<>::LG_ARR_BYTE);
tmp = ss.get();
ss.seekp(HllUtil<>::LG_ARR_BYTE);
ss.put(0);
ss.seekg(0);
hll_sketch::deserialize(ss);
// should work fine despite the corruption
ss.seekp(HllUtil<>::LG_ARR_BYTE);
ss.put(tmp);
}
};
CPPUNIT_TEST_SUITE_REGISTRATION(HllArrayTest);
} /* namespace datasketches */