blob: 3ea8b6b94754c2023ccaf7d5842acb5365e4b472 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "ebpps_sketch.hpp"
#include "py_serde.hpp"
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace py = pybind11;
namespace datasketches {
namespace python {
template<typename T>
ebpps_sketch<T> ebpps_sketch_deserialize(py::bytes& skBytes, py_object_serde& sd) {
std::string skStr = skBytes; // implicit cast
return ebpps_sketch<T>::deserialize(skStr.c_str(), skStr.length(), sd);
}
template<typename T>
py::object ebpps_sketch_serialize(const ebpps_sketch<T>& sk, py_object_serde& sd) {
auto serResult = sk.serialize(0, sd);
return py::bytes((char*)serResult.data(), serResult.size());
}
template<typename T>
std::string ebpps_sketch_to_string(const ebpps_sketch<T>& sk, bool print_items) {
if (print_items) {
std::ostringstream ss;
ss << sk.to_string();
ss << "### EBPPS Sketch Items" << std::endl;
int i = 0;
for (auto item : sk) {
// item is an arbitrary py::object, so get the value by
// using internal str() method then casting to C++ std::string
py::str item_pystr(item);
std::string item_str = py::cast<std::string>(item_pystr);
ss << i++ << ": " << item_str << std::endl;
}
return ss.str();
} else {
return sk.to_string();
}
}
}
}
namespace dspy = datasketches::python;
template<typename T>
void bind_ebpps_sketch(py::module &m, const char* name) {
using namespace datasketches;
py::class_<ebpps_sketch<T>>(m, name)
.def(py::init<uint32_t>(), py::arg("k"))
.def("__str__", &dspy::ebpps_sketch_to_string<T>, py::arg("print_items")=false,
"Produces a string summary of the sketch")
.def("to_string", &dspy::ebpps_sketch_to_string<T>, py::arg("print_items")=false,
"Produces a string summary of the sketch")
.def("update", (void (ebpps_sketch<T>::*)(const T&, double)) &ebpps_sketch<T>::update, py::arg("item"), py::arg("weight")=1.0,
"Updates the sketch with the given value and weight")
.def("merge", (void (ebpps_sketch<T>::*)(const ebpps_sketch<T>&)) &ebpps_sketch<T>::merge,
py::arg("sketch"), "Merges the sketch with the given sketch")
.def_property_readonly("k", &ebpps_sketch<T>::get_k,
"Returns the sketch's maximum configured sample size")
.def_property_readonly("n", &ebpps_sketch<T>::get_n,
"Returns the total stream length")
.def_property_readonly("c", &ebpps_sketch<T>::get_c,
"Returns the expected number of samples returned upon a call to get_result() or the creation of an iterator. "
"The number is a floating point value, where the fractional portion represents the probability of including "
"a \"partial item\" from the sample. The value C should be no larger than the sketch's configured value of k, "
"although numerical precision limitations mean it may exceed k by double precision floating point error margins in certain cases.")
.def("get_samples", &ebpps_sketch<T>::get_result,
"Returns the set of samples in the sketch")
.def("is_empty", &ebpps_sketch<T>::is_empty,
"Returns True if the sketch is empty, otherwise False")
.def("get_serialized_size_bytes",
[](const ebpps_sketch<T>& sk, py_object_serde& sd) { return sk.get_serialized_size_bytes(sd); },
py::arg("serde"),
"Computes the size in bytes needed to serialize the current sketch")
.def("serialize", &dspy::ebpps_sketch_serialize<T>, py::arg("serde"), "Serialize the var opt sketch using the provided serde")
.def_static("deserialize", &dspy::ebpps_sketch_deserialize<T>, py::arg("bytes"), py::arg("serde"),
"Constructs a var opt sketch from the given bytes using the provided serde")
.def("__iter__", [](const ebpps_sketch<T>& sk) { return py::make_iterator(sk.begin(), sk.end()); });
}
void init_ebpps(py::module &m) {
bind_ebpps_sketch<py::object>(m, "ebpps_sketch");
}