Merge pull request #59 from apache/license_notice_disclaimer
readme, license, notice, disclaimer
diff --git a/cpc/src/fm85.cpp b/cpc/src/fm85.cpp
index e7544e4..1055fb1 100644
--- a/cpc/src/fm85.cpp
+++ b/cpc/src/fm85.cpp
@@ -61,20 +61,22 @@
// This is to support custom allocator and deallocator
void fm85InitAD (void* (*alloc)(size_t), void (*dealloc)(void*)) {
if (!fm85Initialized) {
- fm85Initialized = 1;
fm85alloc = alloc;
fm85free = dealloc;
fillByteLeadingZerosTable();
fillByteTrailingZerosTable();
makeTheDecodingTables();
- fillInvPow2Tab ();
- fillKxpByteLookup ();
+ fillInvPow2Tab();
+ fillKxpByteLookup();
+ fm85Initialized = 1;
}
}
void fm85Clean (void) {
- freeTheDecodingTables();
- fm85Initialized = 0;
+ if (fm85Initialized) {
+ freeTheDecodingTables();
+ fm85Initialized = 0;
+ }
}
/*******************************************************/
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index 4663af5..c7ff7a6 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -281,7 +281,7 @@
kll_sketch(uint16_t k, uint8_t flags_byte, const void* bytes, size_t size);
// common update code
- uint32_t internal_update(const T& value);
+ inline uint32_t internal_update(const T& value);
// The following code is only valid in the special case of exactly reaching capacity while updating.
// It cannot be used while merging, while reducing k, or anything else.
@@ -416,14 +416,14 @@
template<typename T, typename C, typename S, typename A>
void kll_sketch<T, C, S, A>::update(const T& value) {
- const uint32_t next_pos = internal_update(value);
- new (&items_[next_pos]) T(value);
+ const uint32_t index = internal_update(value);
+ new (&items_[index]) T(value);
}
template<typename T, typename C, typename S, typename A>
void kll_sketch<T, C, S, A>::update(T&& value) {
- const uint32_t next_pos = internal_update(value);
- new (&items_[next_pos]) T(std::move(value));
+ const uint32_t index = internal_update(value);
+ new (&items_[index]) T(std::move(value));
}
template<typename T, typename C, typename S, typename A>
@@ -438,9 +438,7 @@
if (levels_[0] == 0) compress_while_updating();
n_++;
is_level_zero_sorted_ = false;
- const uint32_t next_pos(levels_[0] - 1);
- levels_[0] = next_pos;
- return next_pos;
+ return --levels_[0];
}
template<typename T, typename C, typename S, typename A>
@@ -647,7 +645,8 @@
}
ptr += S().serialize(ptr, &items_[levels_[0]], get_num_retained());
}
- if (ptr != static_cast<char*>(data_ptr.get()) + size) throw std::logic_error("serialized size mismatch");
+ const size_t delta = ptr - static_cast<const char*>(data_ptr.get());
+ if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
return std::make_pair(std::move(data_ptr), size);
}
@@ -804,7 +803,8 @@
new (max_value_) T(items_[levels_[0]]);
}
is_level_zero_sorted_ = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
- if (ptr != static_cast<const char*>(bytes) + size) throw std::logic_error("deserialized size mismatch");
+ const size_t delta = ptr - static_cast<const char*>(bytes);
+ if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
}
// The following code is only valid in the special case of exactly reaching capacity while updating.
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index 8e12791..c14bb03 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -67,7 +67,9 @@
CPPUNIT_TEST(merge_min_value_from_other);
CPPUNIT_TEST(merge_min_and_max_from_other);
CPPUNIT_TEST(sketch_of_ints);
- CPPUNIT_TEST(sketch_of_strings);
+ CPPUNIT_TEST(sketch_of_strings_stream);
+ CPPUNIT_TEST(sketch_of_strings_bytes);
+ CPPUNIT_TEST(sketch_of_strings_single_item_bytes);
CPPUNIT_TEST(copy);
CPPUNIT_TEST_SUITE_END();
@@ -502,43 +504,83 @@
CPPUNIT_ASSERT_EQUAL(sketch.get_rank(n), sketch2.get_rank(n));
}
- void sketch_of_strings() {
- kll_string_sketch sketch;
- CPPUNIT_ASSERT_THROW(sketch.get_quantile(0), std::runtime_error);
- CPPUNIT_ASSERT_THROW(sketch.get_min_value(), std::runtime_error);
- CPPUNIT_ASSERT_THROW(sketch.get_max_value(), std::runtime_error);
- CPPUNIT_ASSERT_EQUAL(8u, sketch.get_serialized_size_bytes());
+ void sketch_of_strings_stream() {
+ kll_string_sketch sketch1;
+ CPPUNIT_ASSERT_THROW(sketch1.get_quantile(0), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_min_value(), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_max_value(), std::runtime_error);
+ CPPUNIT_ASSERT_EQUAL(8u, sketch1.get_serialized_size_bytes());
- const int n(1000);
- for (int i = 0; i < n; i++) sketch.update(std::to_string(i));
+ const int n = 1000;
+ for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
- CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch.get_min_value());
- CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch.get_max_value());
+ CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch1.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch1.get_max_value());
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
- sketch.serialize(s);
- CPPUNIT_ASSERT_EQUAL(sketch.get_serialized_size_bytes(), (uint32_t) s.tellp());
+ sketch1.serialize(s);
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_serialized_size_bytes(), (uint32_t) s.tellp());
auto sketch2 = kll_string_sketch::deserialize(s);
CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) s.tellg());
CPPUNIT_ASSERT_EQUAL(s.tellp(), s.tellg());
- CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch2.is_empty());
- CPPUNIT_ASSERT_EQUAL(sketch.is_estimation_mode(), sketch2.is_estimation_mode());
- CPPUNIT_ASSERT_EQUAL(sketch.get_n(), sketch2.get_n());
- CPPUNIT_ASSERT_EQUAL(sketch.get_num_retained(), sketch2.get_num_retained());
- CPPUNIT_ASSERT_EQUAL(sketch.get_min_value(), sketch2.get_min_value());
- CPPUNIT_ASSERT_EQUAL(sketch.get_max_value(), sketch2.get_max_value());
- CPPUNIT_ASSERT_EQUAL(sketch.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
- CPPUNIT_ASSERT_EQUAL(sketch.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
- CPPUNIT_ASSERT_EQUAL(sketch.get_quantile(0.5), sketch2.get_quantile(0.5));
- CPPUNIT_ASSERT_EQUAL(sketch.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
- CPPUNIT_ASSERT_EQUAL(sketch.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_empty(), sketch2.is_empty());
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_estimation_mode(), sketch2.is_estimation_mode());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_n(), sketch2.get_n());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_num_retained(), sketch2.get_num_retained());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_min_value(), sketch2.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_max_value(), sketch2.get_max_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_quantile(0.5), sketch2.get_quantile(0.5));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
// to take a look using hexdump
- std::ofstream os("kll-string.bin");
- sketch.serialize(os);
+ //std::ofstream os("kll-string.bin");
+ //sketch1.serialize(os);
// debug print
- //sketch.to_stream(std::cout);
+ //sketch1.to_stream(std::cout);
+ }
+
+ void sketch_of_strings_bytes() {
+ kll_string_sketch sketch1;
+ CPPUNIT_ASSERT_THROW(sketch1.get_quantile(0), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_min_value(), std::runtime_error);
+ CPPUNIT_ASSERT_THROW(sketch1.get_max_value(), std::runtime_error);
+ CPPUNIT_ASSERT_EQUAL(8u, sketch1.get_serialized_size_bytes());
+
+ const int n = 1000;
+ for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
+
+ CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch1.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch1.get_max_value());
+
+ auto data = sketch1.serialize();
+ CPPUNIT_ASSERT_EQUAL((size_t) sketch1.get_serialized_size_bytes(), data.second);
+ auto sketch2 = kll_string_sketch::deserialize(data.first.get(), data.second);
+ CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) data.second);
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_empty(), sketch2.is_empty());
+ CPPUNIT_ASSERT_EQUAL(sketch1.is_estimation_mode(), sketch2.is_estimation_mode());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_n(), sketch2.get_n());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_num_retained(), sketch2.get_num_retained());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_min_value(), sketch2.get_min_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_max_value(), sketch2.get_max_value());
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_quantile(0.5), sketch2.get_quantile(0.5));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
+ CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
+ }
+
+
+ void sketch_of_strings_single_item_bytes() {
+ kll_string_sketch sketch1;
+ sketch1.update("a");
+ auto data = sketch1.serialize();
+ CPPUNIT_ASSERT_EQUAL((size_t) sketch1.get_serialized_size_bytes(), data.second);
+ auto sketch2 = kll_string_sketch::deserialize(data.first.get(), data.second);
+ CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) data.second);
}
void copy() {