cpc/include/u32_table_impl.hpp - datasketches-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 // author Kevin Lang, Oath Research

 #ifndef U32_TABLE_IMPL_HPP_
 #define U32_TABLE_IMPL_HPP_

 #include <stdexcept>
 #include <algorithm>
 #include <climits>

 namespace datasketches {

 template<typename A>
 u32_table<A>::u32_table():
 lg_size(0),
 num_valid_bits(0),
 num_items(0),
 slots()
 {}

 template<typename A>
 u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
 slots(1 << lg_size, UINT32_MAX)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
 }

 template<typename A>
 size_t u32_table<A>::get_num_items() const {
   return num_items;
 }

 template<typename A>
 const uint32_t* u32_table<A>::get_slots() const {
   return slots.data();
 }

 template<typename A>
 uint8_t u32_table<A>::get_lg_size() const {
   return lg_size;
 }

 template<typename A>
 void u32_table<A>::clear() {
   std::fill(slots.begin(), slots.end(), UINT32_MAX);
   num_items = 0;
 }

 template<typename A>
 bool u32_table<A>::maybe_insert(uint32_t item) {
   const size_t index = lookup(item);
   if (slots[index] == item) return false;
   if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
   slots[index] = item;
   num_items++;
   if (U32_TABLE_UPSIZE_DENOM * num_items > U32_TABLE_UPSIZE_NUMER * (1 << lg_size)) {
     rebuild(lg_size + 1);
   }
   return true;
 }

 template<typename A>
 bool u32_table<A>::maybe_delete(uint32_t item) {
   const size_t index = lookup(item);
   if (slots[index] == UINT32_MAX) return false;
   if (slots[index] != item) throw std::logic_error("item does not exist");
   if (num_items == 0) throw std::logic_error("delete error");
   // delete the item
   slots[index] = UINT32_MAX;
   num_items--;

   // re-insert all items between the freed slot and the next empty slot
   const size_t mask = (1 << lg_size) - 1;
   size_t probe = (index + 1) & mask;
   uint32_t fetched = slots[probe];
   while (fetched != UINT32_MAX) {
     slots[probe] = UINT32_MAX;
     must_insert(fetched);
     probe = (probe + 1) & mask;
     fetched = slots[probe];
   }
   // shrink if necessary
   if (U32_TABLE_DOWNSIZE_DENOM * num_items < U32_TABLE_DOWNSIZE_NUMER * (1 << lg_size) && lg_size > 2) {
     rebuild(lg_size - 1);
   }
   return true;
 }

 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
 u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
   u32_table<A> table(lg_num_slots, 6 + lg_k);
   // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
   // However, we are starting out with the correct final table size, so the problem might not occur
   for (size_t i = 0; i < num_pairs; i++) {
     table.must_insert(pairs[i]);
   }
   table.num_items = num_pairs;
   return table;
 }

 template<typename A>
 size_t u32_table<A>::lookup(uint32_t item) const {
   const size_t size = 1 << lg_size;
   const size_t mask = size - 1;
   const uint8_t shift = num_valid_bits - lg_size;
   size_t probe = item >> shift;
   if (probe > mask) throw std::logic_error("probe out of range");
   while (slots[probe] != item && slots[probe] != UINT32_MAX) {
     probe = (probe + 1) & mask;
   }
   return probe;
 }

 // counts and resizing must be handled by the caller
 template<typename A>
 void u32_table<A>::must_insert(uint32_t item) {
   const size_t index = lookup(item);
   if (slots[index] == item) throw std::logic_error("item exists");
   if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
   slots[index] = item;
 }

 template<typename A>
 void u32_table<A>::rebuild(uint8_t new_lg_size) {
   if (new_lg_size < 2) throw std::logic_error("lg_size must be >= 2");
   const size_t old_size = 1 << lg_size;
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
   slots = vector_u32<A>(new_size, UINT32_MAX);
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
       must_insert(old_slots[i]);
     }
   }
 }

 // While extracting the items from a linear probing hashtable,
 // this will usually undo the wrap-around provided that the table
 // isn't too full. Experiments suggest that for sufficiently large tables
 // the load factor would have to be over 90 percent before this would fail frequently,
 // and even then the subsequent sort would fix things up.
 // The result is nearly sorted, so make sure to use an efficient sort for that case
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
   if (num_items == 0) return vector_u32<A>();
   const size_t table_size = 1 << lg_size;
   vector_u32<A> result(num_items);
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;

   // special rules for the region before the first empty slot
   uint32_t hi_bit = 1 << (num_valid_bits - 1);
   while (i < table_size && slots[i] != UINT32_MAX) {
     const uint32_t item = slots[i++];
     if (item & hi_bit) { result[r--] = item; } // this item was probably wrapped, so move to end
     else               { result[l++] = item; }
   }

   // the rest of the table is processed normally
   while (i < table_size) {
     const uint32_t item = slots[i++];
     if (item != UINT32_MAX) result[l++] = item;
   }
   if (l != r + 1) throw std::logic_error("unwrapping error");
   return result;
 }

 // This merge is safe to use in carefully designed overlapping scenarios.
 template<typename A>
 void u32_table<A>::merge(
   const uint32_t* arr_a, size_t start_a, size_t length_a, // input
   const uint32_t* arr_b, size_t start_b, size_t length_b, // input
   uint32_t* arr_c, size_t start_c // output
 ) {
   const size_t length_c = length_a + length_b;
   const size_t lim_a = start_a + length_a;
   const size_t lim_b = start_b + length_b;
   const size_t lim_c = start_c + length_c;
   size_t a = start_a;
   size_t b = start_b;
   size_t c = start_c;
   for ( ; c < lim_c ; c++) {
     if      (b >= lim_b)          { arr_c[c] = arr_a[a++]; }
     else if (a >= lim_a)          { arr_c[c] = arr_b[b++]; }
     else if (arr_a[a] < arr_b[b]) { arr_c[c] = arr_a[a++]; }
     else                          { arr_c[c] = arr_b[b++]; }
   }
   if (a != lim_a || b != lim_b) throw std::logic_error("merging error");
 }

 // In applications where the input array is already nearly sorted,
 // insertion sort runs in linear time with a very small constant.
 // This introspective version of insertion sort protects against
 // the quadratic cost of sorting bad input arrays.
 // It keeps track of how much work has been done, and if that exceeds a
 // constant times the array length, it switches to a different sorting algorithm.

 template<typename A>
 void u32_table<A>::introspective_insertion_sort(uint32_t* a, size_t l, size_t r) { // r points past the rightmost element
   const size_t length = r - l;
   const size_t cost_limit = 8 * length;
   size_t cost = 0;
   for (size_t i = l + 1; i < r; i++) {
     size_t j = i;
     uint32_t v = a[i];
     while (j >= l + 1 && v < a[j - 1]) {
       a[j] = a[j - 1];
       j--;
     }
     a[j] = v;
     cost += i - j; // distance moved is a measure of work
     if (cost > cost_limit) {
       knuth_shell_sort3(a, l, r);
       return;
     }
   }
 }

 template<typename A>
 void u32_table<A>::knuth_shell_sort3(uint32_t* a, size_t l, size_t r) {
   size_t h;
   for (h = 1; h < (r - l) / 9; h = 3 * h + 1);
   for ( ; h > 0; h /= 3) {
     for (size_t i = l + h; i < r; i++) {
       size_t j = i;
       const uint32_t v = a[i];
       while (j >= l + h && v < a[j - h]) {
         a[j] = a[j - h];
         j -= h;
       }
       a[j] = v;
     }
   }
 }

 } /* namespace datasketches */

 #endif
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	// author Kevin Lang, Oath Research

	#ifndef U32_TABLE_IMPL_HPP_
	#define U32_TABLE_IMPL_HPP_

	#include <stdexcept>
	#include <algorithm>
	#include <climits>

	namespace datasketches {

	template<typename A>
	u32_table<A>::u32_table():
	lg_size(0),
	num_valid_bits(0),
	num_items(0),
	slots()
	{}

	template<typename A>
	u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
	lg_size(lg_size),
	num_valid_bits(num_valid_bits),
	num_items(0),
	slots(1 << lg_size, UINT32_MAX)
	{
	if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
	if (num_valid_bits < 1 \|\| num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
	}

	template<typename A>
	size_t u32_table<A>::get_num_items() const {
	return num_items;
	}

	template<typename A>
	const uint32_t* u32_table<A>::get_slots() const {
	return slots.data();
	}

	template<typename A>
	uint8_t u32_table<A>::get_lg_size() const {
	return lg_size;
	}

	template<typename A>
	void u32_table<A>::clear() {
	std::fill(slots.begin(), slots.end(), UINT32_MAX);
	num_items = 0;
	}

	template<typename A>
	bool u32_table<A>::maybe_insert(uint32_t item) {
	const size_t index = lookup(item);
	if (slots[index] == item) return false;
	if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
	slots[index] = item;
	num_items++;
	if (U32_TABLE_UPSIZE_DENOM * num_items > U32_TABLE_UPSIZE_NUMER * (1 << lg_size)) {
	rebuild(lg_size + 1);
	}
	return true;
	}

	template<typename A>
	bool u32_table<A>::maybe_delete(uint32_t item) {
	const size_t index = lookup(item);
	if (slots[index] == UINT32_MAX) return false;
	if (slots[index] != item) throw std::logic_error("item does not exist");
	if (num_items == 0) throw std::logic_error("delete error");
	// delete the item
	slots[index] = UINT32_MAX;
	num_items--;

	// re-insert all items between the freed slot and the next empty slot
	const size_t mask = (1 << lg_size) - 1;
	size_t probe = (index + 1) & mask;
	uint32_t fetched = slots[probe];
	while (fetched != UINT32_MAX) {
	slots[probe] = UINT32_MAX;
	must_insert(fetched);
	probe = (probe + 1) & mask;
	fetched = slots[probe];
	}
	// shrink if necessary
	if (U32_TABLE_DOWNSIZE_DENOM * num_items < U32_TABLE_DOWNSIZE_NUMER * (1 << lg_size) && lg_size > 2) {
	rebuild(lg_size - 1);
	}
	return true;
	}

	// this one is specifically tailored to be a part of fm85 decompression scheme
	template<typename A>
	u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
	uint8_t lg_num_slots = 2;
	while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
	u32_table<A> table(lg_num_slots, 6 + lg_k);
	// Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
	// However, we are starting out with the correct final table size, so the problem might not occur
	for (size_t i = 0; i < num_pairs; i++) {
	table.must_insert(pairs[i]);
	}
	table.num_items = num_pairs;
	return table;
	}

	template<typename A>
	size_t u32_table<A>::lookup(uint32_t item) const {
	const size_t size = 1 << lg_size;
	const size_t mask = size - 1;
	const uint8_t shift = num_valid_bits - lg_size;
	size_t probe = item >> shift;
	if (probe > mask) throw std::logic_error("probe out of range");
	while (slots[probe] != item && slots[probe] != UINT32_MAX) {
	probe = (probe + 1) & mask;
	}
	return probe;
	}

	// counts and resizing must be handled by the caller
	template<typename A>
	void u32_table<A>::must_insert(uint32_t item) {
	const size_t index = lookup(item);
	if (slots[index] == item) throw std::logic_error("item exists");
	if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
	slots[index] = item;
	}

	template<typename A>
	void u32_table<A>::rebuild(uint8_t new_lg_size) {
	if (new_lg_size < 2) throw std::logic_error("lg_size must be >= 2");
	const size_t old_size = 1 << lg_size;
	const size_t new_size = 1 << new_lg_size;
	if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
	vector_u32<A> old_slots = std::move(slots);
	slots = vector_u32<A>(new_size, UINT32_MAX);
	lg_size = new_lg_size;
	for (size_t i = 0; i < old_size; i++) {
	if (old_slots[i] != UINT32_MAX) {
	must_insert(old_slots[i]);
	}
	}
	}

	// While extracting the items from a linear probing hashtable,
	// this will usually undo the wrap-around provided that the table
	// isn't too full. Experiments suggest that for sufficiently large tables
	// the load factor would have to be over 90 percent before this would fail frequently,
	// and even then the subsequent sort would fix things up.
	// The result is nearly sorted, so make sure to use an efficient sort for that case
	template<typename A>
	vector_u32<A> u32_table<A>::unwrapping_get_items() const {
	if (num_items == 0) return vector_u32<A>();
	const size_t table_size = 1 << lg_size;
	vector_u32<A> result(num_items);
	size_t i = 0;
	size_t l = 0;
	size_t r = num_items - 1;

	// special rules for the region before the first empty slot
	uint32_t hi_bit = 1 << (num_valid_bits - 1);
	while (i < table_size && slots[i] != UINT32_MAX) {
	const uint32_t item = slots[i++];
	if (item & hi_bit) { result[r--] = item; } // this item was probably wrapped, so move to end
	else { result[l++] = item; }
	}

	// the rest of the table is processed normally
	while (i < table_size) {
	const uint32_t item = slots[i++];
	if (item != UINT32_MAX) result[l++] = item;
	}
	if (l != r + 1) throw std::logic_error("unwrapping error");
	return result;
	}

	// This merge is safe to use in carefully designed overlapping scenarios.
	template<typename A>
	void u32_table<A>::merge(
	const uint32_t* arr_a, size_t start_a, size_t length_a, // input
	const uint32_t* arr_b, size_t start_b, size_t length_b, // input
	uint32_t* arr_c, size_t start_c // output
	) {
	const size_t length_c = length_a + length_b;
	const size_t lim_a = start_a + length_a;
	const size_t lim_b = start_b + length_b;
	const size_t lim_c = start_c + length_c;
	size_t a = start_a;
	size_t b = start_b;
	size_t c = start_c;
	for ( ; c < lim_c ; c++) {
	if (b >= lim_b) { arr_c[c] = arr_a[a++]; }
	else if (a >= lim_a) { arr_c[c] = arr_b[b++]; }
	else if (arr_a[a] < arr_b[b]) { arr_c[c] = arr_a[a++]; }
	else { arr_c[c] = arr_b[b++]; }
	}
	if (a != lim_a \|\| b != lim_b) throw std::logic_error("merging error");
	}

	// In applications where the input array is already nearly sorted,
	// insertion sort runs in linear time with a very small constant.
	// This introspective version of insertion sort protects against
	// the quadratic cost of sorting bad input arrays.
	// It keeps track of how much work has been done, and if that exceeds a
	// constant times the array length, it switches to a different sorting algorithm.

	template<typename A>
	void u32_table<A>::introspective_insertion_sort(uint32_t* a, size_t l, size_t r) { // r points past the rightmost element
	const size_t length = r - l;
	const size_t cost_limit = 8 * length;
	size_t cost = 0;
	for (size_t i = l + 1; i < r; i++) {
	size_t j = i;
	uint32_t v = a[i];
	while (j >= l + 1 && v < a[j - 1]) {
	a[j] = a[j - 1];
	j--;
	}
	a[j] = v;
	cost += i - j; // distance moved is a measure of work
	if (cost > cost_limit) {
	knuth_shell_sort3(a, l, r);
	return;
	}
	}
	}

	template<typename A>
	void u32_table<A>::knuth_shell_sort3(uint32_t* a, size_t l, size_t r) {
	size_t h;
	for (h = 1; h < (r - l) / 9; h = 3 * h + 1);
	for ( ; h > 0; h /= 3) {
	for (size_t i = l + h; i < r; i++) {
	size_t j = i;
	const uint32_t v = a[i];
	while (j >= l + h && v < a[j - h]) {
	a[j] = a[j - h];
	j -= h;
	}
	a[j] = v;
	}
	}
	}

	} /* namespace datasketches */

	#endif