blob: 06c428ac9640455ef2a462670fdd1732e61f1f3f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <nanobind/nanobind.h>
#include <nanobind/stl/string.h>
#include "cpc_sketch.hpp"
#include "cpc_union.hpp"
#include "cpc_common.hpp"
#include "common_defs.hpp"
namespace nb = nanobind;
void init_cpc(nb::module_ &m) {
using namespace datasketches;
nb::class_<cpc_sketch>(m, "cpc_sketch")
.def(nb::init<uint8_t, uint64_t>(), nb::arg("lg_k")=cpc_constants::DEFAULT_LG_K, nb::arg("seed")=DEFAULT_SEED,
"Creates a new CPC sketch\n\n"
":param lg_k: base 2 logarithm of the number of bins in the sketch\n"
":type lg_k: int, optional\n"
":param seed: seed value for the hash function\n"
":type seed: int, optional"
)
.def("__copy__", [](const cpc_sketch& sk){ return cpc_sketch(sk); })
.def("__str__", [](const cpc_sketch& sk) { return sk.to_string(); },
"Produces a string summary of the sketch")
.def("to_string", &cpc_sketch::to_string,
"Produces a string summary of the sketch")
.def<void (cpc_sketch::*)(uint64_t)>("update", &cpc_sketch::update, nb::arg("datum"),
"Updates the sketch with the given 64-bit integer value")
.def<void (cpc_sketch::*)(double)>("update", &cpc_sketch::update, nb::arg("datum"),
"Updates the sketch with the given 64-bit floating point")
.def<void (cpc_sketch::*)(const std::string&)>("update", &cpc_sketch::update, nb::arg("datum"),
"Updates the sketch with the given string")
.def_prop_ro("lg_k", &cpc_sketch::get_lg_k,
"Configured lg_k of this sketch")
.def("is_empty", &cpc_sketch::is_empty,
"Returns True if the sketch is empty, otherwise False")
.def("get_estimate", &cpc_sketch::get_estimate,
"Estimate of the distinct count of the input stream")
.def("get_lower_bound", &cpc_sketch::get_lower_bound, nb::arg("kappa"),
"Returns an approximate lower bound on the estimate for kappa values in {1, 2, 3}, roughly corresponding to standard deviations")
.def("get_upper_bound", &cpc_sketch::get_upper_bound, nb::arg("kappa"),
"Returns an approximate upper bound on the estimate for kappa values in {1, 2, 3}, roughly corresponding to standard deviations")
.def(
"serialize",
[](const cpc_sketch& sk) {
auto bytes = sk.serialize();
return nb::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
},
"Serializes the sketch into a bytes object"
)
.def_static(
"deserialize",
[](const nb::bytes& bytes) { return cpc_sketch::deserialize(bytes.c_str(), bytes.size()); },
nb::arg("bytes"),
"Reads a bytes object and returns the corresponding cpc_sketch"
);
nb::class_<cpc_union>(m, "cpc_union")
.def(nb::init<uint8_t, uint64_t>(), nb::arg("lg_k"), nb::arg("seed")=DEFAULT_SEED)
.def("update", (void (cpc_union::*)(const cpc_sketch&)) &cpc_union::update, nb::arg("sketch"),
"Updates the union with the provided CPC sketch")
.def("get_result", &cpc_union::get_result,
"Returns a CPC sketch with the result of the union")
;
}