blob: dd16955dbaca6a09fa8df3eb27bd8ac29e40c4f7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef _HLLSKETCH_INTERNAL_HPP_
#define _HLLSKETCH_INTERNAL_HPP_
#include "hll.hpp"
#include "HllUtil.hpp"
#include "HllSketchImplFactory.hpp"
#include "CouponList.hpp"
#include "HllArray.hpp"
#include "common_defs.hpp"
#include <cstdio>
#include <cstdlib>
#include <string>
#include <iostream>
#include <sstream>
#include <iomanip>
namespace datasketches {
typedef union {
int64_t longBytes;
double doubleBytes;
} longDoubleUnion;
template<typename A>
hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size) {
HllUtil<A>::checkLgK(lg_config_k);
if (start_full_size) {
sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size);
} else {
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST);
}
}
template<typename A>
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) {
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is);
hll_sketch_alloc<A> sketch(impl);
return sketch;
}
template<typename A>
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) {
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len);
hll_sketch_alloc<A> sketch(impl);
return sketch;
}
template<typename A>
hll_sketch_alloc<A>::~hll_sketch_alloc() {
if (sketch_impl != nullptr) {
sketch_impl->get_deleter()(sketch_impl);
}
}
template<typename A>
hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that) :
sketch_impl(that.sketch_impl->copy())
{}
template<typename A>
hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type) :
sketch_impl(that.sketch_impl->copyAs(tgt_type))
{}
template<typename A>
hll_sketch_alloc<A>::hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept :
sketch_impl(nullptr)
{
std::swap(sketch_impl, that.sketch_impl);
}
template<typename A>
hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
sketch_impl(that)
{}
template<typename A>
hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
sketch_impl->get_deleter()(sketch_impl);
sketch_impl = other.sketch_impl->copy();
return *this;
}
template<typename A>
hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
std::swap(sketch_impl, other.sketch_impl);
return *this;
}
template<typename A>
void hll_sketch_alloc<A>::reset() {
// TODO: need to allow starting from a full-sized sketch
// (either here or in other implementation)
sketch_impl = sketch_impl->reset();
}
template<typename A>
void hll_sketch_alloc<A>::update(const std::string& datum) {
if (datum.empty()) { return; }
HashState hashResult;
HllUtil<A>::hash(datum.c_str(), datum.length(), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const uint64_t datum) {
// no sign extension with 64 bits so no need to cast to signed value
HashState hashResult;
HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const uint32_t datum) {
update(static_cast<int32_t>(datum));
}
template<typename A>
void hll_sketch_alloc<A>::update(const uint16_t datum) {
update(static_cast<int16_t>(datum));
}
template<typename A>
void hll_sketch_alloc<A>::update(const uint8_t datum) {
update(static_cast<int8_t>(datum));
}
template<typename A>
void hll_sketch_alloc<A>::update(const int64_t datum) {
HashState hashResult;
HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const int32_t datum) {
int64_t val = static_cast<int64_t>(datum);
HashState hashResult;
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const int16_t datum) {
int64_t val = static_cast<int64_t>(datum);
HashState hashResult;
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const int8_t datum) {
int64_t val = static_cast<int64_t>(datum);
HashState hashResult;
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const double datum) {
longDoubleUnion d;
d.doubleBytes = static_cast<double>(datum);
if (datum == 0.0) {
d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
} else if (std::isnan(d.doubleBytes)) {
d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
}
HashState hashResult;
HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const float datum) {
longDoubleUnion d;
d.doubleBytes = static_cast<double>(datum);
if (datum == 0.0) {
d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
} else if (std::isnan(d.doubleBytes)) {
d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
}
HashState hashResult;
HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::update(const void* data, const size_t lengthBytes) {
if (data == nullptr) { return; }
HashState hashResult;
HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
coupon_update(HllUtil<A>::coupon(hashResult));
}
template<typename A>
void hll_sketch_alloc<A>::coupon_update(int coupon) {
if (coupon == HllUtil<A>::EMPTY) { return; }
HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
if (result != this->sketch_impl) {
this->sketch_impl->get_deleter()(this->sketch_impl);
this->sketch_impl = result;
}
}
template<typename A>
void hll_sketch_alloc<A>::serialize_compact(std::ostream& os) const {
return sketch_impl->serialize(os, true);
}
template<typename A>
void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
return sketch_impl->serialize(os, false);
}
template<typename A>
vector_u8<A> hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const {
return sketch_impl->serialize(true, header_size_bytes);
}
template<typename A>
vector_u8<A> hll_sketch_alloc<A>::serialize_updatable() const {
return sketch_impl->serialize(false, 0);
}
template<typename A>
string<A> hll_sketch_alloc<A>::to_string(const bool summary,
const bool detail,
const bool aux_detail,
const bool all) const {
std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
if (summary) {
os << "### HLL sketch summary:" << std::endl
<< " Log Config K : " << get_lg_config_k() << std::endl
<< " Hll Target : " << type_as_string() << std::endl
<< " Current Mode : " << mode_as_string() << std::endl
<< " LB : " << get_lower_bound(1) << std::endl
<< " Estimate : " << get_estimate() << std::endl
<< " UB : " << get_upper_bound(1) << std::endl
<< " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
if (get_current_mode() == HLL) {
HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
os << " CurMin : " << hllArray->getCurMin() << std::endl
<< " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
<< " HipAccum : " << hllArray->getHipAccum() << std::endl
<< " KxQ0 : " << hllArray->getKxQ0() << std::endl
<< " KxQ1 : " << hllArray->getKxQ1() << std::endl;
if (get_target_type() == HLL_4) {
const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
os << " Aux table? : " << (hll4_ptr->getAuxHashMap() != nullptr ? "true" : "false") << std::endl;
}
} else {
os << " Coupon count : "
<< std::to_string(((CouponList<A>*) sketch_impl)->getCouponCount()) << std::endl;
}
os << "### End HLL sketch summary" << std::endl;
}
if (detail) {
os << "### HLL sketch data detail:" << std::endl;
if (get_current_mode() == HLL) {
const HllArray<A>* hll_ptr = static_cast<const HllArray<A>*>(sketch_impl);
os << std::left << std::setw(10) << "Slot" << std::setw(6) << "Value" << std::endl;
auto it = hll_ptr->begin(all);
while (it != hll_ptr->end()) {
os << std::setw(10) << HllUtil<A>::getLow26(*it);
os << std::setw(6) << HllUtil<A>::getValue(*it);
os << std::endl;
++it;
}
} else {
const CouponList<A>* list_ptr = static_cast<const CouponList<A>*>(sketch_impl);
os << std::left;
os << std::setw(10) << "Index";
os << std::setw(10) << "Key";
os << std::setw(10) << "Slot";
os << std::setw(6) << "Value";
os << std::endl;
auto it = list_ptr->begin(all);
int i = 0;
int mask = (1 << get_lg_config_k()) - 1;
while (it != list_ptr->end()) {
os << std::setw(10) << i;
os << std::setw(10) << HllUtil<A>::getLow26(*it);
os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
os << std::setw(6) << HllUtil<A>::getValue(*it);
os << std::endl;
++it;
++i;
}
}
os << "### End HLL sketch data detail" << std::endl;
}
if (aux_detail) {
if ((get_current_mode() == HLL) && (get_target_type() == HLL_4)) {
const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
const AuxHashMap<A>* aux_ptr = hll4_ptr->getAuxHashMap();
if (aux_ptr != nullptr) {
os << "### HLL sketch aux detail:" << std::endl;
os << std::left;
os << std::setw(10) << "Index";
os << std::setw(10) << "Key";
os << std::setw(10) << "Slot";
os << std::setw(6) << "Value";
os << std::endl;
auto it = aux_ptr->begin(all);
int i = 0;
int mask = (1 << get_lg_config_k()) - 1;
while (it != aux_ptr->end()) {
os << std::setw(10) << i;
os << std::setw(10) << HllUtil<A>::getLow26(*it);
os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
os << std::setw(6) << HllUtil<A>::getValue(*it);
os << std::endl;
++it;
++i;
}
os << "### End HLL sketch aux detail" << std::endl;
}
}
}
return os.str();
}
template<typename A>
double hll_sketch_alloc<A>::get_estimate() const {
return sketch_impl->getEstimate();
}
template<typename A>
double hll_sketch_alloc<A>::get_composite_estimate() const {
return sketch_impl->getCompositeEstimate();
}
template<typename A>
double hll_sketch_alloc<A>::get_lower_bound(int numStdDev) const {
return sketch_impl->getLowerBound(numStdDev);
}
template<typename A>
double hll_sketch_alloc<A>::get_upper_bound(int numStdDev) const {
return sketch_impl->getUpperBound(numStdDev);
}
template<typename A>
hll_mode hll_sketch_alloc<A>::get_current_mode() const {
return sketch_impl->getCurMode();
}
template<typename A>
int hll_sketch_alloc<A>::get_lg_config_k() const {
return sketch_impl->getLgConfigK();
}
template<typename A>
target_hll_type hll_sketch_alloc<A>::get_target_type() const {
return sketch_impl->getTgtHllType();
}
template<typename A>
bool hll_sketch_alloc<A>::is_out_of_order_flag() const {
return sketch_impl->isOutOfOrderFlag();
}
template<typename A>
bool hll_sketch_alloc<A>::is_estimation_mode() const {
return true;
}
template<typename A>
int hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
return sketch_impl->getUpdatableSerializationBytes();
}
template<typename A>
int hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
return sketch_impl->getCompactSerializationBytes();
}
template<typename A>
bool hll_sketch_alloc<A>::is_compact() const {
return sketch_impl->isCompact();
}
template<typename A>
bool hll_sketch_alloc<A>::is_empty() const {
return sketch_impl->isEmpty();
}
template<typename A>
std::string hll_sketch_alloc<A>::type_as_string() const {
switch (sketch_impl->getTgtHllType()) {
case target_hll_type::HLL_4:
return std::string("HLL_4");
case target_hll_type::HLL_6:
return std::string("HLL_6");
case target_hll_type::HLL_8:
return std::string("HLL_8");
default:
throw std::runtime_error("Sketch state error: Invalid target_hll_type");
}
}
template<typename A>
std::string hll_sketch_alloc<A>::mode_as_string() const {
switch (sketch_impl->getCurMode()) {
case LIST:
return std::string("LIST");
case SET:
return std::string("SET");
case HLL:
return std::string("HLL");
default:
throw std::runtime_error("Sketch state error: Invalid hll_mode");
}
}
template<typename A>
int hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(const int lg_config_k,
const target_hll_type tgtHllType) {
int arrBytes;
if (tgtHllType == target_hll_type::HLL_4) {
const int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[lg_config_k];
arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
} else if (tgtHllType == target_hll_type::HLL_6) {
arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
} else { //HLL_8
arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
}
return HllUtil<A>::HLL_BYTE_ARR_START + arrBytes;
}
template<typename A>
double hll_sketch_alloc<A>::get_rel_err(const bool upperBound, const bool unioned,
const int lg_config_k, const int numStdDev) {
return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
}
}
#endif // _HLLSKETCH_INTERNAL_HPP_