Merge branch 'pybind_build_dependency' of github.com:apache/datasketches-cpp into pybind_build_dependency
diff --git a/.gitmodules b/.gitmodules
index 4dbf077..e69de29 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "python/pybind11"]
- path = python/pybind11
- url = https://github.com/pybind/pybind11
diff --git a/common/include/common_defs.hpp b/common/include/common_defs.hpp
index 778f851..dadcaac 100644
--- a/common/include/common_defs.hpp
+++ b/common/include/common_defs.hpp
@@ -29,6 +29,8 @@
static const uint64_t DEFAULT_SEED = 9001;
+enum resize_factor { X1 = 0, X2, X4, X8 };
+
template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index cde110f..feb1e15 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -26,9 +26,16 @@
namespace datasketches {
-static const uint8_t CPC_MIN_LG_K = 4;
-static const uint8_t CPC_MAX_LG_K = 26;
-static const uint8_t CPC_DEFAULT_LG_K = 11;
+namespace cpc_constants {
+ const uint8_t MIN_LG_K = 4;
+ const uint8_t MAX_LG_K = 26;
+ const uint8_t DEFAULT_LG_K = 11;
+}
+
+// TODO: Redundant and deprecated. Will be removed in next major version release.
+static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
+static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
+static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index cdc4d16..c000ac9 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -67,7 +67,7 @@
* @param lg_k base 2 logarithm of the number of bins in the sketch
* @param seed for hash function
*/
- explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+ explicit cpc_sketch_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
using allocator_type = A;
A get_allocator() const;
diff --git a/cpc/include/cpc_union.hpp b/cpc/include/cpc_union.hpp
index dd59abc..54fbed5 100644
--- a/cpc/include/cpc_union.hpp
+++ b/cpc/include/cpc_union.hpp
@@ -45,7 +45,7 @@
* @param lg_k base 2 logarithm of the number of bins in the sketch
* @param seed for hash function
*/
- explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+ explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
cpc_union_alloc(const cpc_union_alloc<A>& other);
cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index 28ea8ea..fa55520 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -153,6 +153,10 @@
template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
template<typename A> using vector_d = std::vector<double, AllocD<A>>;
+namespace kll_constants {
+ const uint16_t DEFAULT_K = 200;
+}
+
template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
class kll_sketch {
public:
@@ -160,11 +164,12 @@
using comparator = C;
static const uint8_t DEFAULT_M = 8;
- static const uint16_t DEFAULT_K = 200;
+ // TODO: Redundant and deprecated. Will be remove din next major version.
+ static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
static const uint16_t MIN_K = DEFAULT_M;
static const uint16_t MAX_K = (1 << 16) - 1;
- explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
+ explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
kll_sketch(const kll_sketch& other);
kll_sketch(kll_sketch&& other) noexcept;
~kll_sketch();
diff --git a/pyproject.toml b/pyproject.toml
index 077b885..a989be3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,8 @@
requires = ["wheel",
"setuptools >= 30.3.0",
"setuptools_scm",
- "cmake >= 3.12"]
+ "cmake >= 3.12",
+ "pybind11[global] >= 2.6.0"]
[tool.tox]
legacy_tox_ini = """
@@ -12,6 +13,7 @@
[testenv]
deps = pytest
numpy
+ pybind11[global] >= 2.6.0
changedir = python/tests
commands = pytest
"""
\ No newline at end of file
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 0b76799..4da7b9d 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -22,9 +22,12 @@
set(PYBIND11_CPP_STANDARD -std=c++11)
endif()
-add_subdirectory(pybind11)
+find_package(Python3 COMPONENTS Interpreter Development)
+find_package(pybind11 CONFIG)
+#add_subdirectory(pybind11)
-pybind11_add_module(python MODULE EXCLUDE_FROM_ALL SYSTEM THIN_LTO)
+#pybind11_add_module(python MODULE EXCLUDE_FROM_ALL SYSTEM THIN_LTO)
+pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
target_link_libraries(python
PRIVATE
diff --git a/python/pybind11 b/python/pybind11
deleted file mode 160000
index 59a2ac2..0000000
--- a/python/pybind11
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 59a2ac2745d8a57ac94c6accced73620d59fb844
diff --git a/python/src/cpc_wrapper.cpp b/python/src/cpc_wrapper.cpp
index 673e3c7..deae7f6 100644
--- a/python/src/cpc_wrapper.cpp
+++ b/python/src/cpc_wrapper.cpp
@@ -53,7 +53,7 @@
using namespace datasketches;
py::class_<cpc_sketch>(m, "cpc_sketch")
- .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=CPC_DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
+ .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=cpc_constants::DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
.def(py::init<const cpc_sketch&>())
.def("__str__", &cpc_sketch::to_string,
"Produces a string summary of the sketch")
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index d356358..48004b9 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -116,7 +116,7 @@
using namespace datasketches;
py::class_<kll_sketch<T>>(m, name)
- .def(py::init<uint16_t>(), py::arg("k")=kll_sketch<T>::DEFAULT_K)
+ .def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
.def(py::init<const kll_sketch<T>&>())
.def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
"Updates the sketch with the given value")
diff --git a/python/src/theta_wrapper.cpp b/python/src/theta_wrapper.cpp
index cf8fdc7..86a2406 100644
--- a/python/src/theta_wrapper.cpp
+++ b/python/src/theta_wrapper.cpp
@@ -103,7 +103,7 @@
py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
.def(py::init(&dspy::update_theta_sketch_factory),
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
.def(py::init<const update_theta_sketch&>())
.def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
"Updates the sketch with the given integral value")
@@ -127,7 +127,7 @@
py::class_<theta_union>(m, "theta_union")
.def(py::init(&dspy::theta_union_factory),
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
.def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
"Updates the union with the given sketch")
.def("get_result", &theta_union::get_result, py::arg("ordered")=true,
diff --git a/python/src/vector_of_kll.cpp b/python/src/vector_of_kll.cpp
index b8d8a91..5a2ef1d 100644
--- a/python/src/vector_of_kll.cpp
+++ b/python/src/vector_of_kll.cpp
@@ -29,14 +29,20 @@
namespace datasketches {
+namespace vector_of_kll_constants {
+ static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
+ static const uint32_t DEFAULT_D = 1;
+}
+
// Wrapper class for Numpy compatibility
template <typename T, typename C = std::less<T>, typename S = serde<T>>
class vector_of_kll_sketches {
public:
- static const uint32_t DEFAULT_K = kll_sketch<T, C, S>::DEFAULT_K;
- static const uint32_t DEFAULT_D = 1;
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
+ static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
+ static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
- explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
+ explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
vector_of_kll_sketches(const vector_of_kll_sketches& other);
vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
@@ -432,8 +438,8 @@
using namespace datasketches;
py::class_<vector_of_kll_sketches<T>>(m, name)
- .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_sketches<T>::DEFAULT_K,
- py::arg("d")=vector_of_kll_sketches<T>::DEFAULT_D)
+ .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K,
+ py::arg("d")=vector_of_kll_constants::DEFAULT_D)
.def(py::init<const vector_of_kll_sketches<T>&>())
// allow user to retrieve k or d, in case it's instantiated w/ defaults
.def("get_k", &vector_of_kll_sketches<T>::get_k,
diff --git a/sampling/include/var_opt_sketch.hpp b/sampling/include/var_opt_sketch.hpp
index 3f5110c..a949899 100644
--- a/sampling/include/var_opt_sketch.hpp
+++ b/sampling/include/var_opt_sketch.hpp
@@ -51,18 +51,23 @@
double total_sketch_weight;
};
-enum resize_factor { X1 = 0, X2, X4, X8 };
-
template <typename T, typename S, typename A> class var_opt_union; // forward declaration
+namespace var_opt_constants {
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+ const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
+}
+
template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
class var_opt_sketch {
public:
- static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
- static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
+ static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
+ static const uint32_t MAX_K = var_opt_constants::MAX_K;
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
+ explicit var_opt_sketch(uint32_t k,
+ resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
+ const A& allocator = A());
var_opt_sketch(const var_opt_sketch& other);
var_opt_sketch(var_opt_sketch&& other) noexcept;
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index dbfe5a4..ce0ac40 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -128,7 +128,7 @@
r_(r_count),
n_(n),
total_wt_r_(total_wt_r),
- rf_(DEFAULT_RESIZE_FACTOR),
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
curr_items_alloc_(len),
filled_data_(n > k),
allocator_(allocator),
diff --git a/setup.py b/setup.py
index 9d270d9..a84954e 100644
--- a/setup.py
+++ b/setup.py
@@ -49,8 +49,9 @@
os.path.dirname(self.get_ext_fullpath(ext.name)))
cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
cmake_args += ['-DWITH_PYTHON=True']
+ cmake_args += ['-DCMAKE_CXX_STANDARD=11']
# ensure we use a consistent python version
- cmake_args += ['-DPYTHON_EXECUTABLE=' + sys.executable]
+ cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
cfg = 'Debug' if self.debug else 'Release'
build_args = ['--config', cfg]
diff --git a/theta/include/theta_constants.hpp b/theta/include/theta_constants.hpp
index d5d6fd9..e47457b 100644
--- a/theta/include/theta_constants.hpp
+++ b/theta/include/theta_constants.hpp
@@ -21,14 +21,19 @@
#define THETA_CONSTANTS_HPP_
#include <climits>
+#include "common_defs.hpp"
namespace datasketches {
namespace theta_constants {
- enum resize_factor { X1, X2, X4, X8 };
- static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
- static const uint8_t MIN_LG_K = 5;
- static const uint8_t MAX_LG_K = 26;
+ using resize_factor = datasketches::resize_factor;
+ //enum resize_factor { X1, X2, X4, X8 };
+ const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
+ const uint8_t MIN_LG_K = 5;
+ const uint8_t MAX_LG_K = 26;
+
+ const uint8_t DEFAULT_LG_K = 12;
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
}
} /* namespace datasketches */
diff --git a/theta/include/theta_update_sketch_base.hpp b/theta/include/theta_update_sketch_base.hpp
index 4be1040..9d51c62 100644
--- a/theta/include/theta_update_sketch_base.hpp
+++ b/theta/include/theta_update_sketch_base.hpp
@@ -94,11 +94,14 @@
template<typename Derived, typename Allocator>
class theta_base_builder {
public:
+ // TODO: Redundant and deprecated. Will be removed in next major verison release.
using resize_factor = theta_constants::resize_factor;
static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
- static const uint8_t DEFAULT_LG_K = 12;
- static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+ // TODO: The following defaults are redundant and deprecated. Will be removed in the
+ // next major version release
+ static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
+ static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
/**
* Creates and instance of the builder with default parameters.
diff --git a/theta/include/theta_update_sketch_base_impl.hpp b/theta/include/theta_update_sketch_base_impl.hpp
index 947f085..b4a3124 100644
--- a/theta/include/theta_update_sketch_base_impl.hpp
+++ b/theta/include/theta_update_sketch_base_impl.hpp
@@ -271,7 +271,11 @@
template<typename Derived, typename Allocator>
theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
-allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
+allocator_(allocator),
+lg_k_(theta_constants::DEFAULT_LG_K),
+rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
+p_(1),
+seed_(DEFAULT_SEED) {}
template<typename Derived, typename Allocator>
Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {