| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #ifndef _HLL4ARRAY_INTERNAL_HPP_ |
| #define _HLL4ARRAY_INTERNAL_HPP_ |
| |
| #include "Hll4Array.hpp" |
| |
| #include <cstring> |
| #include <memory> |
| #include <stdexcept> |
| #include <string> |
| |
| namespace datasketches { |
| |
| template<typename A> |
| Hll4Array<A>::Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator): |
| HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator), |
| auxHashMap_(nullptr) |
| { |
| const uint32_t numBytes = this->hll4ArrBytes(lgConfigK); |
| this->hllByteArr_.resize(numBytes, 0); |
| } |
| |
| template<typename A> |
| Hll4Array<A>::Hll4Array(const Hll4Array<A>& that) : |
| HllArray<A>(that) |
| { |
| // can determine hllByteArr size in parent class, no need to allocate here |
| // but parent class doesn't handle the auxHashMap |
| if (that.auxHashMap_ != nullptr) { |
| auxHashMap_ = that.auxHashMap_->copy(); |
| } else { |
| auxHashMap_ = nullptr; |
| } |
| } |
| |
| template<typename A> |
| Hll4Array<A>::~Hll4Array() { |
| // hllByteArr deleted in parent |
| if (auxHashMap_ != nullptr) { |
| AuxHashMap<A>::make_deleter()(auxHashMap_); |
| } |
| } |
| |
| template<typename A> |
| std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const { |
| return [](HllSketchImpl<A>* ptr) { |
| Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr); |
| using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>; |
| Hll4Alloc hll4Alloc(hll->getAllocator()); |
| hll->~Hll4Array(); |
| hll4Alloc.deallocate(hll, 1); |
| }; |
| } |
| |
| template<typename A> |
| Hll4Array<A>* Hll4Array<A>::copy() const { |
| using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>; |
| Hll4Alloc hll4Alloc(this->getAllocator()); |
| return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this); |
| } |
| |
| template<typename A> |
| uint32_t Hll4Array<A>::getUpdatableSerializationBytes() const { |
| AuxHashMap<A>* auxHashMap = getAuxHashMap(); |
| uint32_t auxBytes; |
| if (auxHashMap == nullptr) { |
| auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_]; |
| } else { |
| auxBytes = 4 << auxHashMap->getLgAuxArrInts(); |
| } |
| return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxBytes; |
| } |
| |
| template<typename A> |
| uint32_t Hll4Array<A>::getHllByteArrBytes() const { |
| return this->hll4ArrBytes(this->lgConfigK_); |
| } |
| |
| template<typename A> |
| AuxHashMap<A>* Hll4Array<A>::getAuxHashMap() const { |
| return auxHashMap_; |
| } |
| |
| template<typename A> |
| void Hll4Array<A>::putAuxHashMap(AuxHashMap<A>* auxHashMap) { |
| this->auxHashMap_ = auxHashMap; |
| } |
| |
| template<typename A> |
| uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const { |
| const uint8_t byte = this->hllByteArr_[slotNo >> 1]; |
| if ((slotNo & 1) > 0) { // odd? |
| return byte >> 4; |
| } |
| return byte & hll_constants::loNibbleMask; |
| } |
| |
| template<typename A> |
| uint8_t Hll4Array<A>::get_value(uint32_t index) const { |
| const uint8_t value = getSlot(index); |
| if (value != hll_constants::AUX_TOKEN) return value + this->curMin_; |
| return auxHashMap_->mustFindValueFor(index); |
| } |
| |
| template<typename A> |
| HllSketchImpl<A>* Hll4Array<A>::couponUpdate(uint32_t coupon) { |
| internalCouponUpdate(coupon); |
| return this; |
| } |
| |
| template<typename A> |
| void Hll4Array<A>::internalCouponUpdate(uint32_t coupon) { |
| const uint8_t newValue = HllUtil<A>::getValue(coupon); |
| if (newValue <= this->curMin_) { |
| return; // quick rejection, but only works for large N |
| } |
| const uint32_t configKmask = (1 << this->lgConfigK_) - 1; |
| const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask; |
| internalHll4Update(slotNo, newValue); |
| } |
| |
| template<typename A> |
| void Hll4Array<A>::putSlot(uint32_t slotNo, uint8_t newValue) { |
| const uint32_t byteno = slotNo >> 1; |
| const uint8_t oldValue = this->hllByteArr_[byteno]; |
| if ((slotNo & 1) == 0) { // set low nibble |
| this->hllByteArr_[byteno] |
| = ((oldValue & hll_constants::hiNibbleMask) | (newValue & hll_constants::loNibbleMask)); |
| } else { // set high nibble |
| this->hllByteArr_[byteno] |
| = ((oldValue & hll_constants::loNibbleMask) | ((newValue << 4) & hll_constants::hiNibbleMask)); |
| } |
| } |
| |
| //In C: two-registers.c Line 836 in "hhb_abstract_set_slot_if_new_value_bigger" non-sparse |
| template<typename A> |
| void Hll4Array<A>::internalHll4Update(uint32_t slotNo, uint8_t newVal) { |
| |
| const uint8_t rawStoredOldValue = getSlot(slotNo); // could be a 0 |
| // this is provably a LB: |
| const uint8_t lbOnOldValue = rawStoredOldValue + this->curMin_; // lower bound, could be 0 |
| |
| if (newVal > lbOnOldValue) { // 842 |
| // Note: if an AUX_TOKEN exists, then auxHashMap must already exist |
| // 846: rawStoredOldValue == AUX_TOKEN |
| const uint8_t actualOldValue = (rawStoredOldValue < hll_constants::AUX_TOKEN) |
| ? (lbOnOldValue) : (auxHashMap_->mustFindValueFor(slotNo)); |
| |
| if (newVal > actualOldValue) { // 848: actualOldValue could still be 0; newValue > 0 |
| // we know that the array will change, but we haven't actually updated yet |
| this->hipAndKxQIncrementalUpdate(actualOldValue, newVal); |
| |
| // newVal >= curMin |
| |
| const uint8_t shiftedNewValue = newVal - this->curMin_; // 874 |
| // redundant since we know newVal >= curMin, |
| // and lgConfigK bounds do not allow overflowing an int |
| //assert(shiftedNewValue >= 0); |
| |
| if (rawStoredOldValue == hll_constants::AUX_TOKEN) { // 879 |
| // Given that we have an AUX_TOKEN, there are 4 cases for how to |
| // actually modify the data structure |
| |
| if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 1: 881 |
| // the byte array already contains aux token |
| // This is the case where old and new values are both exceptions. |
| // The 4-bit array already is AUX_TOKEN, only need to update auxHashMap |
| auxHashMap_->mustReplace(slotNo, newVal); |
| } |
| else { // case 2: 885 |
| // This is the hypothetical case where the old value is an exception and the new one is not, |
| // which is impossible given that curMin has not changed here and newVal > oldValue |
| } |
| } else { // rawStoredOldValue != AUX_TOKEN |
| if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 3: 892 |
| // This is the case where the old value is not an exception and the new value is. |
| // The AUX_TOKEN must be stored in the 4-bit array and the new value |
| // added to the exception table |
| putSlot(slotNo, hll_constants::AUX_TOKEN); |
| if (auxHashMap_ == nullptr) { |
| auxHashMap_ = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_], |
| this->lgConfigK_, this->getAllocator()); |
| } |
| auxHashMap_->mustAdd(slotNo, newVal); |
| } |
| else { // case 4: 897 |
| // This is the case where neither the old value nor the new value is an exception. |
| // We just overwrite the 4-bit array with the shifted new value. |
| putSlot(slotNo, shiftedNewValue); |
| } |
| } |
| |
| // we just increased a pair value, so it might be time to change curMin |
| if (actualOldValue == this->curMin_) { // 908 |
| this->decNumAtCurMin(); |
| while (this->numAtCurMin_ == 0) { |
| shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table |
| // shifts values in 4-bit table and recounts curMin |
| } |
| } |
| } // end newVal <= actualOldValue |
| } // end newValue <= lbOnOldValue -> return, no need to update array |
| } |
| |
| // This scheme only works with two double registers (2 kxq values). |
| // HipAccum, kxq0 and kxq1 remain untouched. |
| // This changes curMin, numAtCurMin, hllByteArr and auxMap. |
| // Entering this routine assumes that all slots have valid values > 0 and <= 15. |
| // An AuxHashMap must exist if any values in the current hllByteArray are already 15. |
| // In C: again-two-registers.c Lines 710 "hhb_shift_to_bigger_curmin" |
| template<typename A> |
| void Hll4Array<A>::shiftToBiggerCurMin() { |
| const uint8_t newCurMin = this->curMin_ + 1; |
| const uint32_t configK = 1 << this->lgConfigK_; |
| const uint32_t configKmask = configK - 1; |
| |
| uint32_t numAtNewCurMin = 0; |
| uint32_t numAuxTokens = 0; |
| |
| // Walk through the slots of 4-bit array decrementing stored values by one unless it |
| // equals AUX_TOKEN, where it is left alone but counted to be checked later. |
| // If oldStoredValue is 0 it is an error. |
| // If the decremented value is 0, we increment numAtNewCurMin. |
| // Because getNibble is masked to 4 bits oldStoredValue can never be > 15 or negative |
| for (uint32_t i = 0; i < configK; i++) { //724 |
| uint8_t oldStoredValue = getSlot(i); |
| if (oldStoredValue == 0) { |
| throw std::runtime_error("Array slots cannot be 0 at this point."); |
| } |
| if (oldStoredValue < hll_constants::AUX_TOKEN) { |
| putSlot(i, --oldStoredValue); |
| if (oldStoredValue == 0) { numAtNewCurMin++; } |
| } else { //oldStoredValue == AUX_TOKEN |
| numAuxTokens++; |
| if (auxHashMap_ == nullptr) { |
| throw std::logic_error("auxHashMap cannot be null at this point"); |
| } |
| } |
| } |
| |
| // If old AuxHashMap exists, walk through it updating some slots and build a new AuxHashMap |
| // if needed. |
| AuxHashMap<A>* newAuxMap = nullptr; |
| if (auxHashMap_ != nullptr) { |
| uint32_t slotNum; |
| uint8_t oldActualVal; |
| uint8_t newShiftedVal; |
| |
| for (const auto coupon: *auxHashMap_) { |
| slotNum = HllUtil<A>::getLow26(coupon) & configKmask; |
| oldActualVal = HllUtil<A>::getValue(coupon); |
| newShiftedVal = oldActualVal - newCurMin; |
| if (newShiftedVal < 0) { |
| throw std::logic_error("oldActualVal < newCurMin when incrementing curMin"); |
| } |
| |
| if (getSlot(slotNum) != hll_constants::AUX_TOKEN) { |
| throw std::logic_error("getSlot(slotNum) != AUX_TOKEN for item in auxiliary hash map"); |
| } |
| // Array slot != AUX_TOKEN at getSlot(slotNum); |
| if (newShiftedVal < hll_constants::AUX_TOKEN) { // 756 |
| if (newShiftedVal != 14) { |
| throw std::logic_error("newShiftedVal != 14 for item in old auxHashMap despite curMin increment"); |
| } |
| // The former exception value isn't one anymore, so it stays out of new AuxHashMap. |
| // Correct the AUX_TOKEN value in the HLL array to the newShiftedVal (14). |
| putSlot(slotNum, newShiftedVal); |
| numAuxTokens--; |
| } else { //newShiftedVal >= AUX_TOKEN |
| // the former exception remains an exception, so must be added to the newAuxMap |
| if (newAuxMap == nullptr) { |
| newAuxMap = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_], |
| this->lgConfigK_, this->getAllocator()); |
| } |
| newAuxMap->mustAdd(slotNum, oldActualVal); |
| } |
| } //end scan of oldAuxMap |
| } //end if (auxHashMap != null) |
| else { // oldAuxMap == null |
| if (numAuxTokens != 0) { |
| throw std::logic_error("No auxiliary hash map, but numAuxTokens != 0"); |
| } |
| } |
| |
| if (newAuxMap != nullptr) { |
| if (newAuxMap->getAuxCount() != numAuxTokens) { |
| throw std::runtime_error("Inconsistent counts: auxCount: " + std::to_string(newAuxMap->getAuxCount()) |
| + ", HLL tokesn: " + std::to_string(numAuxTokens)); |
| } |
| } |
| |
| if (auxHashMap_ != nullptr) { |
| AuxHashMap<A>::make_deleter()(auxHashMap_); |
| } |
| auxHashMap_ = newAuxMap; |
| |
| this->curMin_ = newCurMin; |
| this->numAtCurMin_ = numAtNewCurMin; |
| } |
| |
| template<typename A> |
| typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const { |
| return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_, |
| auxHashMap_, this->curMin_, all); |
| } |
| |
| template<typename A> |
| typename HllArray<A>::const_iterator Hll4Array<A>::end() const { |
| return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_, |
| this->tgtHllType_, auxHashMap_, this->curMin_, false); |
| } |
| |
| template<typename A> |
| void Hll4Array<A>::mergeHll(const HllArray<A>& src) { |
| for (const auto coupon: src) { |
| internalCouponUpdate(coupon); |
| } |
| } |
| |
| } |
| |
| #endif // _HLL4ARRAY_INTERNAL_HPP_ |