blob: 38c5412c8314995b1be5511061aa13c0755a36e1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef COMPACT_THETA_SKETCH_PARSER_IMPL_HPP_
#define COMPACT_THETA_SKETCH_PARSER_IMPL_HPP_
#include <iostream>
#include <iomanip>
namespace datasketches {
template<bool dummy>
auto compact_theta_sketch_parser<dummy>::parse(const void* ptr, size_t size, uint64_t seed) -> compact_theta_sketch_data {
if (size < 8) throw std::invalid_argument("at least 8 bytes expected, actual " + std::to_string(size)
+ ", sketch dump: " + hex_dump(reinterpret_cast<const uint8_t*>(ptr), size));
checker<true>::check_serial_version(reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_SERIAL_VERSION_BYTE], COMPACT_SKETCH_SERIAL_VERSION);
checker<true>::check_sketch_type(reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_TYPE_BYTE], COMPACT_SKETCH_TYPE);
uint64_t theta = theta_constants::MAX_THETA;
const uint16_t seed_hash = reinterpret_cast<const uint16_t*>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
if (reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_EMPTY_FLAG)) {
return {true, true, seed_hash, 0, theta, nullptr};
}
const bool has_theta = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 2;
if (has_theta) {
if (size < 16) throw std::invalid_argument("at least 16 bytes expected, actual " + std::to_string(size));
theta = reinterpret_cast<const uint64_t*>(ptr)[COMPACT_SKETCH_THETA_U64];
}
if (reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] == 1) {
return {false, true, seed_hash, 1, theta, reinterpret_cast<const uint64_t*>(ptr) + COMPACT_SKETCH_SINGLE_ENTRY_U64};
}
const uint32_t num_entries = reinterpret_cast<const uint32_t*>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
const size_t entries_start_u64 = has_theta ? COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 : COMPACT_SKETCH_ENTRIES_EXACT_U64;
const uint64_t* entries = reinterpret_cast<const uint64_t*>(ptr) + entries_start_u64;
const size_t expected_size_bytes = (entries_start_u64 + num_entries) * sizeof(uint64_t);
if (size < expected_size_bytes)
throw std::invalid_argument(std::to_string(expected_size_bytes)
+ " bytes expected, actual " + std::to_string(size) + ", sketch dump: " + hex_dump(reinterpret_cast<const uint8_t*>(ptr), size));
const bool is_ordered = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_ORDERED_FLAG);
return {false, is_ordered, seed_hash, num_entries, theta, entries};
}
template<bool dummy>
std::string compact_theta_sketch_parser<dummy>::hex_dump(const uint8_t* ptr, size_t size) {
std::stringstream s;
s << std::hex << std::setfill('0') << std::uppercase;
for (size_t i = 0; i < size; ++i) s << std::setw(2) << (ptr[i] & 0xff);
return s.str();
}
} /* namespace datasketches */
#endif