| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include <catch2/catch.hpp> |
| #include <fstream> |
| #include <vector> |
| |
| #include "array_of_strings_sketch.hpp" |
| |
| namespace datasketches { |
| // assume the binary sketches for this test have been generated by datasketches-java code |
| // in the subdirectory called "java" in the root directory of this project |
| static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; |
| |
| static std::vector<uint8_t> read_binary_file(const std::string& path) { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| is.seekg(0, std::ios::end); |
| const auto size = static_cast<size_t>(is.tellg()); |
| is.seekg(0, std::ios::beg); |
| std::vector<uint8_t> bytes(size); |
| if (size != 0) { |
| is.read(reinterpret_cast<char*>(bytes.data()), size); |
| } |
| return bytes; |
| } |
| |
| TEST_CASE("aos sketch one value", "[serde_compat]") { |
| const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; |
| for (const unsigned n: n_arr) { |
| const auto path = testBinaryInputPath + "aos_1_n" + std::to_string(n) + "_java.sk"; |
| SECTION("stream") { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| is, DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE(sketch.is_empty() == (n == 0)); |
| REQUIRE(sketch.is_estimation_mode() == (n > 1000)); |
| REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 1); |
| } |
| } |
| SECTION("bytes") { |
| const auto bytes = read_binary_file(path); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| bytes.data(), bytes.size(), DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE(sketch.is_empty() == (n == 0)); |
| REQUIRE(sketch.is_estimation_mode() == (n > 1000)); |
| REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 1); |
| } |
| } |
| } |
| } |
| |
| TEST_CASE("aos sketch three values", "[serde_compat]") { |
| const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; |
| for (const unsigned n: n_arr) { |
| const auto path = testBinaryInputPath + "aos_3_n" + std::to_string(n) + "_java.sk"; |
| SECTION("stream") { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| is, DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE(sketch.is_empty() == (n == 0)); |
| REQUIRE(sketch.is_estimation_mode() == (n > 1000)); |
| REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 3); |
| } |
| } |
| SECTION("bytes") { |
| const auto bytes = read_binary_file(path); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| bytes.data(), bytes.size(), DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE(sketch.is_empty() == (n == 0)); |
| REQUIRE(sketch.is_estimation_mode() == (n > 1000)); |
| REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 3); |
| } |
| } |
| } |
| } |
| |
| TEST_CASE("aos sketch non-empty no entries", "[serde_compat]") { |
| const auto path = testBinaryInputPath + "aos_1_non_empty_no_entries_java.sk"; |
| SECTION("stream") { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| is, DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE_FALSE(sketch.is_empty()); |
| REQUIRE(sketch.get_num_retained() == 0); |
| } |
| SECTION("bytes") { |
| const auto bytes = read_binary_file(path); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| bytes.data(), bytes.size(), DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE_FALSE(sketch.is_empty()); |
| REQUIRE(sketch.get_num_retained() == 0); |
| } |
| } |
| |
| TEST_CASE("aos sketch multi keys strings", "[serde_compat]") { |
| const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; |
| for (const unsigned n: n_arr) { |
| const auto path = testBinaryInputPath + "aos_multikey_n" + std::to_string(n) + "_java.sk"; |
| SECTION("stream") { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| is, DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE(sketch.is_empty() == (n == 0)); |
| REQUIRE(sketch.is_estimation_mode() == (n > 1000)); |
| REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 1); |
| } |
| } |
| SECTION("bytes") { |
| const auto bytes = read_binary_file(path); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| bytes.data(), bytes.size(), DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| REQUIRE(sketch.is_empty() == (n == 0)); |
| REQUIRE(sketch.is_estimation_mode() == (n > 1000)); |
| REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 1); |
| } |
| } |
| } |
| } |
| |
| TEST_CASE("aos sketch unicode strings", "[serde_compat]") { |
| const auto path = testBinaryInputPath + "aos_unicode_java.sk"; |
| auto check = [](const compact_array_of_strings_tuple_sketch<>& sketch) { |
| REQUIRE_FALSE(sketch.is_empty()); |
| REQUIRE_FALSE(sketch.is_estimation_mode()); |
| REQUIRE(sketch.get_num_retained() == 3); |
| |
| const std::vector<std::vector<std::string>> expected_values = { |
| {"밸류", "값"}, |
| {"📦", "🎁"}, |
| {"ценить1", "ценить2"} |
| }; |
| std::vector<bool> matched(expected_values.size(), false); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| REQUIRE(entry.second.size() == 2); |
| |
| bool found = false; |
| for (size_t i = 0; i < expected_values.size(); ++i) { |
| if (matched[i]) continue; |
| const auto& expected = expected_values[i]; |
| if (entry.second.size() != expected.size()) continue; |
| bool equal = true; |
| for (size_t j = 0; j < expected.size(); ++j) { |
| if (entry.second[j] != expected[j]) { |
| equal = false; |
| break; |
| } |
| } |
| if (equal) { |
| matched[i] = true; |
| found = true; |
| break; |
| } |
| } |
| REQUIRE(found); |
| } |
| for (bool found: matched) REQUIRE(found); |
| }; |
| SECTION("stream") { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| is, DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| check(sketch); |
| } |
| SECTION("bytes") { |
| const auto bytes = read_binary_file(path); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| bytes.data(), bytes.size(), DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| check(sketch); |
| } |
| } |
| |
| TEST_CASE("aos sketch empty strings", "[serde_compat]") { |
| const auto path = testBinaryInputPath + "aos_empty_strings_java.sk"; |
| auto check = [](const compact_array_of_strings_tuple_sketch<>& sketch) { |
| REQUIRE_FALSE(sketch.is_empty()); |
| REQUIRE_FALSE(sketch.is_estimation_mode()); |
| REQUIRE(sketch.get_num_retained() == 3); |
| const std::vector<std::vector<std::string>> expected_values = { |
| {"empty_key_value"}, |
| {""}, |
| {"", ""} |
| }; |
| std::vector<bool> matched(expected_values.size(), false); |
| for (const auto& entry: sketch) { |
| REQUIRE(entry.first < sketch.get_theta64()); |
| |
| bool found = false; |
| for (size_t i = 0; i < expected_values.size(); ++i) { |
| if (matched[i]) continue; |
| const auto& expected = expected_values[i]; |
| if (entry.second.size() != expected.size()) continue; |
| bool equal = true; |
| for (size_t j = 0; j < expected.size(); ++j) { |
| if (entry.second[j] != expected[j]) { |
| equal = false; |
| break; |
| } |
| } |
| if (equal) { |
| matched[i] = true; |
| found = true; |
| break; |
| } |
| } |
| REQUIRE(found); |
| } |
| for (bool found: matched) REQUIRE(found); |
| }; |
| SECTION("stream") { |
| std::ifstream is; |
| is.exceptions(std::ios::failbit | std::ios::badbit); |
| is.open(path, std::ios::binary); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| is, DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| check(sketch); |
| } |
| SECTION("bytes") { |
| const auto bytes = read_binary_file(path); |
| const auto sketch = compact_array_of_strings_tuple_sketch<>::deserialize( |
| bytes.data(), bytes.size(), DEFAULT_SEED, default_array_of_strings_serde<>() |
| ); |
| check(sketch); |
| } |
| } |
| } |