| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #ifndef _COUPONLIST_INTERNAL_HPP_ |
| #define _COUPONLIST_INTERNAL_HPP_ |
| |
| #include "CouponList.hpp" |
| #include "CubicInterpolation.hpp" |
| #include "HllUtil.hpp" |
| #include "IntArrayPairIterator.hpp" |
| |
| #include <iostream> |
| #include <cstring> |
| #include <cmath> |
| #include <algorithm> |
| |
| namespace datasketches { |
| |
| template<typename A> |
| CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode) |
| : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) { |
| if (mode == hll_mode::LIST) { |
| lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE; |
| oooFlag = false; |
| } else { // mode == SET |
| lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE; |
| oooFlag = true; |
| } |
| const int arrayLen = 1 << lgCouponArrInts; |
| typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; |
| couponIntArr = intAlloc().allocate(arrayLen); |
| std::fill(couponIntArr, couponIntArr + arrayLen, 0); |
| couponCount = 0; |
| } |
| |
| template<typename A> |
| CouponList<A>::CouponList(const CouponList& that) |
| : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false), |
| lgCouponArrInts(that.lgCouponArrInts), |
| couponCount(that.couponCount), |
| oooFlag(that.oooFlag) { |
| |
| const int numItems = 1 << lgCouponArrInts; |
| typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; |
| couponIntArr = intAlloc().allocate(numItems); |
| std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr); |
| } |
| |
| template<typename A> |
| CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType) |
| : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false), |
| lgCouponArrInts(that.lgCouponArrInts), |
| couponCount(that.couponCount), |
| oooFlag(that.oooFlag) { |
| |
| const int numItems = 1 << lgCouponArrInts; |
| typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; |
| couponIntArr = intAlloc().allocate(numItems); |
| std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr); |
| } |
| |
| template<typename A> |
| CouponList<A>::~CouponList() { |
| typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; |
| intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts); |
| } |
| |
| template<typename A> |
| std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const { |
| return [](HllSketchImpl<A>* ptr) { |
| CouponList<A>* cl = static_cast<CouponList<A>*>(ptr); |
| cl->~CouponList(); |
| clAlloc().deallocate(cl, 1); |
| }; |
| } |
| |
| template<typename A> |
| CouponList<A>* CouponList<A>::copy() const { |
| return new (clAlloc().allocate(1)) CouponList<A>(*this); |
| } |
| |
| template<typename A> |
| CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const { |
| return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType); |
| } |
| |
| template<typename A> |
| CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) { |
| if (len < HllUtil<A>::LIST_INT_ARR_START) { |
| throw std::invalid_argument("Input data length insufficient to hold CouponHashSet"); |
| } |
| |
| const uint8_t* data = static_cast<const uint8_t*>(bytes); |
| if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) { |
| throw std::invalid_argument("Incorrect number of preInts in input stream"); |
| } |
| if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) { |
| throw std::invalid_argument("Wrong ser ver in input stream"); |
| } |
| if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) { |
| throw std::invalid_argument("Input stream is not an HLL sketch"); |
| } |
| |
| hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]); |
| if (mode != LIST) { |
| throw std::invalid_argument("Calling set construtor with non-list mode data"); |
| } |
| |
| target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]); |
| |
| const int lgK = data[HllUtil<A>::LG_K_BYTE]; |
| const bool compact = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false); |
| const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false); |
| const bool emptyFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false); |
| |
| const int couponCount = data[HllUtil<A>::LIST_COUNT_BYTE]; |
| const int couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK))); |
| const size_t expectedLength = HllUtil<A>::LIST_INT_ARR_START + (couponsInArray * sizeof(int)); |
| if (len < expectedLength) { |
| throw std::invalid_argument("Byte array too short for sketch. Expected " + std::to_string(expectedLength) |
| + ", found: " + std::to_string(len)); |
| } |
| |
| CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode); |
| sketch->couponCount = couponCount; |
| sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST |
| |
| if (!emptyFlag) { |
| // only need to read valid coupons, unlike in stream case |
| std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int)); |
| } |
| |
| return sketch; |
| } |
| |
| template<typename A> |
| CouponList<A>* CouponList<A>::newList(std::istream& is) { |
| uint8_t listHeader[8]; |
| is.read((char*)listHeader, 8 * sizeof(uint8_t)); |
| |
| if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) { |
| throw std::invalid_argument("Incorrect number of preInts in input stream"); |
| } |
| if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) { |
| throw std::invalid_argument("Wrong ser ver in input stream"); |
| } |
| if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) { |
| throw std::invalid_argument("Input stream is not an HLL sketch"); |
| } |
| |
| hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]); |
| if (mode != LIST) { |
| throw std::invalid_argument("Calling list construtor with non-list mode data"); |
| } |
| |
| const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]); |
| |
| const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE]; |
| const bool compact = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false); |
| const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false); |
| const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false); |
| |
| CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode); |
| const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE]; |
| sketch->couponCount = couponCount; |
| sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST |
| |
| if (!emptyFlag) { |
| // For stream processing, need to read entire number written to stream so read |
| // pointer ends up set correctly. |
| // If not compact, still need to read empty items even though in order. |
| const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts)); |
| is.read((char*)sketch->couponIntArr, numToRead * sizeof(int)); |
| } |
| |
| return sketch; |
| } |
| |
| template<typename A> |
| std::pair<std::unique_ptr<uint8_t, std::function<void(uint8_t*)>>, const size_t> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const { |
| const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes; |
| typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc; |
| std::unique_ptr<uint8_t, std::function<void(uint8_t*)>> byteArr( |
| uint8Alloc().allocate(sketchSizeBytes), |
| [sketchSizeBytes](uint8_t* p){ uint8Alloc().deallocate(p, sketchSizeBytes); } |
| ); |
| |
| uint8_t* bytes = byteArr.get() + header_size_bytes; |
| |
| bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts()); |
| bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER); |
| bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID); |
| bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK); |
| bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts); |
| bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact); |
| bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0); |
| bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte(); |
| |
| if (this->mode == SET) { |
| std::memcpy(bytes + HllUtil<A>::HASH_SET_COUNT_INT, &couponCount, sizeof(couponCount)); |
| } |
| |
| // coupons |
| // isCompact() is always false for now |
| const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); |
| switch (sw) { |
| case 0: { // src updatable, dst updatable |
| std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int)); |
| break; |
| } |
| case 1: { // src updatable, dst compact |
| pair_iterator_with_deleter<A> itr = getIterator(); |
| bytes += getMemDataStart(); // reusing ponter for incremental writes |
| while (itr->nextValid()) { |
| const int pairValue = itr->getPair(); |
| std::memcpy(bytes, &pairValue, sizeof(pairValue)); |
| bytes += sizeof(pairValue); |
| } |
| break; |
| } |
| |
| default: |
| throw std::runtime_error("Impossible condition when serializing"); |
| } |
| |
| return std::make_pair(std::move(byteArr), sketchSizeBytes); |
| } |
| |
| template<typename A> |
| void CouponList<A>::serialize(std::ostream& os, const bool compact) const { |
| // header |
| const uint8_t preInts(getPreInts()); |
| os.write((char*)&preInts, sizeof(preInts)); |
| const uint8_t serialVersion(HllUtil<A>::SER_VER); |
| os.write((char*)&serialVersion, sizeof(serialVersion)); |
| const uint8_t familyId(HllUtil<A>::FAMILY_ID); |
| os.write((char*)&familyId, sizeof(familyId)); |
| const uint8_t lgKByte((uint8_t) this->lgConfigK); |
| os.write((char*)&lgKByte, sizeof(lgKByte)); |
| const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts); |
| os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte)); |
| const uint8_t flagsByte(this->makeFlagsByte(compact)); |
| os.write((char*)&flagsByte, sizeof(flagsByte)); |
| |
| if (this->mode == LIST) { |
| const uint8_t listCount((uint8_t) couponCount); |
| os.write((char*)&listCount, sizeof(listCount)); |
| } else { // mode == SET |
| const uint8_t unused(0); |
| os.write((char*)&unused, sizeof(unused)); |
| } |
| |
| const uint8_t modeByte(this->makeModeByte()); |
| os.write((char*)&modeByte, sizeof(modeByte)); |
| |
| if (this->mode == SET) { |
| // writing as int, already stored as int |
| os.write((char*)&couponCount, sizeof(couponCount)); |
| } |
| |
| // coupons |
| // isCompact() is always false for now |
| const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); |
| switch (sw) { |
| case 0: { // src updatable, dst updatable |
| os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int)); |
| break; |
| } |
| case 1: { // src updatable, dst compact |
| pair_iterator_with_deleter<A> itr = getIterator(); |
| while (itr->nextValid()) { |
| const int pairValue = itr->getPair(); |
| os.write((char*)&pairValue, sizeof(pairValue)); |
| } |
| break; |
| } |
| |
| default: |
| throw std::runtime_error("Impossible condition when serializing"); |
| } |
| |
| return; |
| } |
| |
| template<typename A> |
| HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) { |
| const int len = 1 << lgCouponArrInts; |
| for (int i = 0; i < len; ++i) { // search for empty slot |
| const int couponAtIdx = couponIntArr[i]; |
| if (couponAtIdx == HllUtil<A>::EMPTY) { |
| couponIntArr[i] = coupon; // the actual update |
| ++couponCount; |
| if (couponCount >= len) { // array full |
| if (this->lgConfigK < 8) { |
| return promoteHeapListOrSetToHll(*this); // oooFlag = false |
| } |
| return promoteHeapListToSet(*this); // oooFlag = true; |
| } |
| return this; |
| } |
| // cell not empty |
| if (couponAtIdx == coupon) { |
| return this; // duplicate |
| } |
| // cell not empty and not a duplicate, continue |
| } |
| throw std::runtime_error("Array invalid: no empties and no duplicates"); |
| } |
| |
| template<typename A> |
| double CouponList<A>::getCompositeEstimate() const { return getEstimate(); } |
| |
| template<typename A> |
| double CouponList<A>::getEstimate() const { |
| const int couponCount = getCouponCount(); |
| const double est = CubicInterpolation<A>::usingXAndYTables(couponCount); |
| return fmax(est, couponCount); |
| } |
| |
| template<typename A> |
| double CouponList<A>::getLowerBound(const int numStdDev) const { |
| HllUtil<A>::checkNumStdDev(numStdDev); |
| const int couponCount = getCouponCount(); |
| const double est = CubicInterpolation<A>::usingXAndYTables(couponCount); |
| const double tmp = est / (1.0 + (numStdDev * HllUtil<A>::COUPON_RSE)); |
| return fmax(tmp, couponCount); |
| } |
| |
| template<typename A> |
| double CouponList<A>::getUpperBound(const int numStdDev) const { |
| HllUtil<A>::checkNumStdDev(numStdDev); |
| const int couponCount = getCouponCount(); |
| const double est = CubicInterpolation<A>::usingXAndYTables(couponCount); |
| const double tmp = est / (1.0 - (numStdDev * HllUtil<A>::COUPON_RSE)); |
| return fmax(tmp, couponCount); |
| } |
| |
| template<typename A> |
| bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; } |
| |
| template<typename A> |
| int CouponList<A>::getUpdatableSerializationBytes() const { |
| return getMemDataStart() + (4 << getLgCouponArrInts()); |
| } |
| |
| template<typename A> |
| int CouponList<A>::getCouponCount() const { |
| return couponCount; |
| } |
| |
| template<typename A> |
| int CouponList<A>::getCompactSerializationBytes() const { |
| return getMemDataStart() + (couponCount << 2); |
| } |
| |
| template<typename A> |
| int CouponList<A>::getMemDataStart() const { |
| return HllUtil<A>::LIST_INT_ARR_START; |
| } |
| |
| template<typename A> |
| int CouponList<A>::getPreInts() const { |
| return HllUtil<A>::LIST_PREINTS; |
| } |
| |
| template<typename A> |
| bool CouponList<A>::isCompact() const { return false; } |
| |
| template<typename A> |
| bool CouponList<A>::isOutOfOrderFlag() const { return oooFlag; } |
| |
| template<typename A> |
| void CouponList<A>::putOutOfOrderFlag(bool oooFlag) { |
| this->oooFlag = oooFlag; |
| } |
| |
| template<typename A> |
| int CouponList<A>::getLgCouponArrInts() const { |
| return lgCouponArrInts; |
| } |
| |
| template<typename A> |
| int* CouponList<A>::getCouponIntArr() const { |
| return couponIntArr; |
| } |
| |
| template<typename A> |
| pair_iterator_with_deleter<A> CouponList<A>::getIterator() const { |
| typedef typename std::allocator_traits<A>::template rebind_alloc<IntArrayPairIterator<A>> iapiAlloc; |
| IntArrayPairIterator<A>* itr = new (iapiAlloc().allocate(1)) IntArrayPairIterator<A>(couponIntArr, 1 << lgCouponArrInts, this->lgConfigK); |
| return pair_iterator_with_deleter<A>( |
| itr, |
| [](PairIterator<A>* ptr) { |
| IntArrayPairIterator<A>* iapi = static_cast<IntArrayPairIterator<A>*>(ptr); |
| iapi->~IntArrayPairIterator(); |
| iapiAlloc().deallocate(iapi, 1); |
| } |
| ); |
| } |
| |
| template<typename A> |
| HllSketchImpl<A>* CouponList<A>::promoteHeapListToSet(CouponList& list) { |
| return HllSketchImplFactory<A>::promoteListToSet(list); |
| } |
| |
| template<typename A> |
| HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) { |
| return HllSketchImplFactory<A>::promoteListOrSetToHll(src); |
| } |
| |
| } |
| |
| #endif // _COUPONLIST_INTERNAL_HPP_ |