blob: ef0eb671a5098209f7b2b21864ebaf3ec8f4eea1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <nanobind/nanobind.h>
#include <nanobind/make_iterator.h>
#include <nanobind/stl/function.h>
#include <nanobind/stl/string.h>
#include "py_serde.hpp"
#include "py_object_ostream.hpp"
#include "var_opt_sketch.hpp"
#include "var_opt_union.hpp"
namespace nb = nanobind;
template<typename T>
void bind_vo_sketch(nb::module_ &m, const char* name) {
using namespace datasketches;
nb::class_<var_opt_sketch<T>>(m, name)
.def(nb::init<uint32_t>(), nb::arg("k"),
"Creates a new Var Opt sketch instance\n\n"
":param k: Maximum number of samples in the sketch\n:type k: int\n"
)
.def("__copy__", [](const var_opt_sketch<T>& sk){ return var_opt_sketch<T>(sk); })
.def("__str__", [](const var_opt_sketch<T>& sk) { return sk.to_string(); },
"Produces a string summary of the sketch")
.def("to_string",
[](const var_opt_sketch<T>& sk, bool print_items) {
std::ostringstream ss;
ss << sk.to_string();
if (print_items)
ss << sk.items_to_string();
return ss.str();
}, nb::arg("print_items")=false,
"Produces a string summary of the sketch and optionally prints the items")
.def("update", (void (var_opt_sketch<T>::*)(const T&, double)) &var_opt_sketch<T>::update, nb::arg("item"), nb::arg("weight")=1.0,
"Updates the sketch with the given value and weight")
.def_prop_ro("k", &var_opt_sketch<T>::get_k,
"Returns the sketch's maximum configured sample size")
.def_prop_ro("n", &var_opt_sketch<T>::get_n,
"Returns the total stream length")
.def_prop_ro("num_samples", &var_opt_sketch<T>::get_num_samples,
"Returns the number of samples currently in the sketch")
.def("is_empty", &var_opt_sketch<T>::is_empty,
"Returns True if the sketch is empty, otherwise False")
.def("estimate_subset_sum",
[](const var_opt_sketch<T>& sk, const std::function<bool(T)> func) {
subset_summary summary = sk.estimate_subset_sum(func);
nb::dict d;
d["estimate"] = summary.estimate;
d["lower_bound"] = summary.lower_bound;
d["upper_bound"] = summary.upper_bound;
d["total_sketch_weight"] = summary.total_sketch_weight;
return d;
}, nb::arg("predicate"),
"Applies a provided predicate to the sketch and returns the estimated total weight matching the predicate, as well "
"as upper and lower bounds on the estimate and the total weight processed by the sketch")
.def("get_serialized_size_bytes",
[](const var_opt_sketch<T>& sk, py_object_serde& serde) { return sk.get_serialized_size_bytes(serde); },
nb::arg("serde"),
"Computes the size in bytes needed to serialize the current sketch")
.def("serialize",
[](const var_opt_sketch<T>& sk, py_object_serde& serde) {
auto bytes = sk.serialize(0, serde);
return nb::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
}, nb::arg("serde"),
"Serializes the sketch into a bytes object")
.def_static(
"deserialize",
[](const nb::bytes& bytes, py_object_serde& serde) { return var_opt_sketch<T>::deserialize(bytes.c_str(), bytes.size(), serde); },
nb::arg("bytes"), nb::arg("serde"),
"Reads a bytes object and returns the corresponding var opt sketch")
.def("__iter__",
[](const var_opt_sketch<T>& sk) {
return nb::make_iterator(nb::type<var_opt_sketch<T>>(),
"var_opt_iterator",
sk.begin(),
sk.end());
}, nb::keep_alive<0,1>()
)
;
}
template<typename T>
void bind_vo_union(nb::module_ &m, const char* name) {
using namespace datasketches;
nb::class_<var_opt_union<T>>(m, name)
.def(nb::init<uint32_t>(), nb::arg("max_k"))
.def("__str__", [](const var_opt_union<T>& sk) { return sk.to_string(); },
"Produces a string summary of the sketch")
.def("to_string", &var_opt_union<T>::to_string,
"Produces a string summary of the sketch")
.def("update", (void (var_opt_union<T>::*)(const var_opt_sketch<T>& sk)) &var_opt_union<T>::update, nb::arg("sketch"),
"Updates the union with the given sketch")
.def("get_result", &var_opt_union<T>::get_result,
"Returns a sketch corresponding to the union result")
.def("reset", &var_opt_union<T>::reset,
"Resets the union to the empty state")
.def("get_serialized_size_bytes",
[](const var_opt_union<T>& u, py_object_serde& serde) { return u.get_serialized_size_bytes(serde); },
nb::arg("serde"),
"Computes the size in bytes needed to serialize the current union")
.def("serialize",
[](const var_opt_union<T>& u, py_object_serde& serde) {
auto bytes = u.serialize(0, serde);
return nb::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
}, nb::arg("serde"),
"Serializes the union into a bytes object with the provided serde")
.def_static(
"deserialize",
[](const nb::bytes& bytes, py_object_serde& serde) { return var_opt_union<T>::deserialize(bytes.c_str(), bytes.size(), serde); },
nb::arg("bytes"), nb::arg("serde"),
"Constructs a var opt union from the given bytes using the provided serde")
;
}
void init_vo(nb::module_ &m) {
bind_vo_sketch<nb::object>(m, "var_opt_sketch");
bind_vo_union<nb::object>(m, "var_opt_union");
}