blob: 4857f51b7d32f1acb681e9b29b50c5058a334f7e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef KLL_HELPER_HPP_
#define KLL_HELPER_HPP_
#include <random>
#include <stdexcept>
#include <chrono>
namespace datasketches {
static std::independent_bits_engine<std::mt19937, 1, uint32_t> random_bit(std::chrono::system_clock::now().time_since_epoch().count());
#ifdef KLL_VALIDATION
extern uint32_t kll_next_offset;
#endif
// 0 <= power <= 30
static const uint64_t powers_of_three[] = {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441,
1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467,
3486784401, 10460353203, 31381059609, 94143178827, 282429536481,
847288609443, 2541865828329, 7625597484987, 22876792454961, 68630377364883,
205891132094649};
class kll_helper {
public:
static inline bool is_even(uint32_t value);
static inline bool is_odd(uint32_t value);
static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
static inline uint8_t ub_on_num_levels(uint64_t n);
static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
static inline uint32_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
static inline uint32_t int_cap_aux(uint16_t k, uint8_t depth);
static inline uint32_t int_cap_aux_aux(uint16_t k, uint8_t depth);
static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
/*
* This version is for floating point types
* Checks the sequential validity of the given array of values.
* They must be unique, monotonically increasing and not NaN.
*/
template <typename T, typename C>
static typename std::enable_if<std::is_floating_point<T>::value, void>::type
validate_values(const T* values, uint32_t size) {
for (uint32_t i = 0; i < size ; i++) {
if (std::isnan(values[i])) {
throw std::invalid_argument("Values must not be NaN");
}
if ((i < (size - 1)) && !(C()(values[i], values[i + 1]))) {
throw std::invalid_argument("Values must be unique and monotonically increasing");
}
}
}
/*
* This version is for non-floating point types
* Checks the sequential validity of the given array of values.
* They must be unique and monotonically increasing.
*/
template <typename T, typename C>
static typename std::enable_if<!std::is_floating_point<T>::value, void>::type
validate_values(const T* values, uint32_t size) {
for (uint32_t i = 0; i < size ; i++) {
if ((i < (size - 1)) && !(C()(values[i], values[i + 1]))) {
throw std::invalid_argument("Values must be unique and monotonically increasing");
}
}
}
template <typename T>
static void randomly_halve_down(T* buf, uint32_t start, uint32_t length);
template <typename T>
static void randomly_halve_up(T* buf, uint32_t start, uint32_t length);
// this version moves objects within the same buffer
// assumes that destination has initialized objects
// does not destroy the originals after the move
template <typename T, typename C>
static void merge_sorted_arrays(T* buf, uint32_t start_a, uint32_t len_a, uint32_t start_b, uint32_t len_b, uint32_t start_c);
// this version is to merge from two different buffers into a third buffer
// initializes objects is the destination buffer
// moves objects from buf_a and destroys the originals
// copies objects from buf_b
template <typename T, typename C>
static void merge_sorted_arrays(const T* buf_a, uint32_t start_a, uint32_t len_a, const T* buf_b, uint32_t start_b, uint32_t len_b, T* buf_c, uint32_t start_c);
struct compress_result {
uint8_t final_num_levels;
uint32_t final_capacity;
uint32_t final_num_items;
};
/*
* Here is what we do for each level:
* If it does not need to be compacted, then simply copy it over.
*
* Otherwise, it does need to be compacted, so...
* Copy zero or one guy over.
* If the level above is empty, halve up.
* Else the level above is nonempty, so...
* halve down, then merge up.
* Adjust the boundaries of the level above.
*
* It can be proved that general_compress returns a sketch that satisfies the space constraints
* no matter how much data is passed in.
* All levels except for level zero must be sorted before calling this, and will still be
* sorted afterwards.
* Level zero is not required to be sorted before, and may not be sorted afterwards.
*/
template <typename T, typename C>
static compress_result general_compress(uint16_t k, uint8_t m, uint8_t num_levels_in, T* items,
uint32_t* in_levels, uint32_t* out_levels, bool is_level_zero_sorted);
template<typename T>
static void copy_construct(const T* src, size_t src_first, size_t src_last, T* dst, size_t dst_first);
template<typename T>
static void move_construct(T* src, size_t src_first, size_t src_last, T* dst, size_t dst_first, bool destroy);
#ifdef KLL_VALIDATION
private:
static inline uint32_t deterministic_offset();
#endif
};
} /* namespace datasketches */
#include "kll_helper_impl.hpp"
#endif // KLL_HELPER_HPP_