Merge pull request #36 from apache/theta_null
allow null inputs
diff --git a/DISCLAIMER-WIP b/DISCLAIMER-WIP
deleted file mode 100644
index ae9f942..0000000
--- a/DISCLAIMER-WIP
+++ /dev/null
@@ -1,26 +0,0 @@
-Apache DataSketches (incubating) is an effort undergoing incubation
-at The Apache Software Foundation (ASF), sponsored by the Apache Incubator.
-
-Incubation is required of all newly accepted projects until a further review
-indicates that the infrastructure, communications, and decision making process
-have stabilized in a manner consistent with other successful ASF projects.
-
-While incubation status is not necessarily a reflection of the
-completeness or stability of the code, it does indicate that the
-project has yet to be fully endorsed by the ASF.
-
-Some of the incubating project's releases may not be fully compliant
-with ASF policy. For example, releases may have incomplete or
-un-reviewed licensing conditions. What follows is a list of known
-issues the project is currently aware of (note that this list, by
-definition, is likely to be incomplete):
-
- * No issues are known at this time.
-
-If you are planning to incorporate this work into your
-product or project, please be aware that you will need to conduct a
-thorough licensing review to determine the overall implications of
-including this work. For the current status of this project through the Apache
-Incubator visit:
-
-http://incubator.apache.org/projects/datasketches.html
diff --git a/Dockerfile b/Dockerfile
index 76c6e97..daa3548 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -70,7 +70,7 @@
done; \
[ -n "$success" ]; \
} && \
- download_bin "datasketches-cpp.zip" "$DS_CPP_HASH" "incubator/datasketches/cpp/$DS_CPP_VER/apache-datasketches-cpp-$DS_CPP_VER-src.zip" && \
+ download_bin "datasketches-cpp.zip" "$DS_CPP_HASH" "datasketches/cpp/$DS_CPP_VER/apache-datasketches-cpp-$DS_CPP_VER-src.zip" && \
unzip datasketches-cpp.zip && \
mv apache-datasketches-cpp-$DS_CPP_VER-src datasketches-cpp && \
make && \
diff --git a/META.json b/META.json
index ce22b3b..749c802 100644
--- a/META.json
+++ b/META.json
@@ -3,7 +3,7 @@
"abstract": "approximate algorithms for big data analysis",
"version": "1.3.0",
"maintainer": [
- "Apache DataSketches (incubating) <issues@datasketches.apache.org>",
+ "Apache DataSketches <issues@datasketches.apache.org>",
"Sketches User List <sketches-user@googlegroups.com>"
],
"license": "postgresql",
@@ -36,11 +36,11 @@
},
"resources": {
"bugtracker": {
- "web": "https://github.com/apache/incubator-datasketches-postgresql/issues"
+ "web": "https://github.com/apache/datasketches-postgresql/issues"
},
"repository": {
- "url": "https://github.com/apache/incubator-datasketches-postgresql.git",
- "web": "https://github.com/apache/incubator-datasketches-postgresql",
+ "url": "https://github.com/apache/datasketches-postgresql.git",
+ "web": "https://github.com/apache/datasketches-postgresql",
"type": "git"
}
},
diff --git a/NOTICE b/NOTICE
index e51b4ae..8098108 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,4 +1,4 @@
-Apache DataSketches-postgresql (incubating)
+Apache DataSketches-postgresql
Copyright 2020 The Apache Software Foundation
Copyright 2015-2018 Yahoo
diff --git a/README.md b/README.md
index 3b2fcd9..d605541 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@
- KLL float quantiles sketch - for estimating distributions: quantile, rank, PMF (histogram), CDF
- Frequent strings sketch - capture the heaviest items (strings) by count or by some other weight
-## How to build and install
+## How to build and install [![Build Status](https://travis-ci.org/apache/incubator-datasketches-postgresql.svg?branch=master)](https://travis-ci.org/apache/incubator-datasketches-postgresql)
This code is intended to be distributed as a PostgreSQL extension on [PGXN site](https://pgxn.org/)
@@ -42,9 +42,9 @@
This code requires C++11. It was tested with GCC 4.8.5 (standard in RedHat at the time of this writing), GCC 8.2.0, GCC 9.2.0, Apple LLVM version 10.0.1 (clang-1001.0.46.4) and version 11.0.0 (clang-1100.0.33.8).
-This code depends on [datasketches-cpp version 1.0.0-incubating](https://github.com/apache/incubator-datasketches-cpp/releases/tag/1.0.0-incubating)
+This code depends on [datasketches-cpp version 2.1.0-incubating](https://github.com/apache/incubator-datasketches-cpp/releases/tag/2.1.0-incubating)
-There are two slightly different ways to build this extension: from a PGXN distribution or from two separate packages: datasketches-postgresql and datasketches-cpp (either from GitHub or from [Apache archive](http://archive.apache.org/dist/incubator/datasketches/))
+There are two slightly different ways to build this extension: from a PGXN distribution or from two separate packages: datasketches-postgresql and datasketches-cpp (either from GitHub or from [Apache archive](http://archive.apache.org/dist/datasketches/))
### PGXN extension
@@ -64,8 +64,8 @@
- make
- sudo make install
-On MacOSX Mojave, if you see a warning like this:<br>
-clang: warning: no such sysroot directory: ‘/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk’ [-Wmissing-sysroot]<br>
+On MacOSX Mojave, if you see a warning like this:
+`clang: warning: no such sysroot directory: ‘/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk’ [-Wmissing-sysroot]`
and the compilation fails because of not being able to find system include files, this is a known OSX problem. There are known solutions on the Internet.
### Verifying installation with a test database
@@ -407,7 +407,3 @@
(10,3649596,3289743,3649596)
(11,3294912,2935059,3294912)
(11 rows)
-
-----
-
-Disclaimer: Apache DataSketches is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
diff --git a/package.sh b/package.sh
index d57b44a..8b4d1a3 100755
--- a/package.sh
+++ b/package.sh
@@ -32,7 +32,7 @@
fi
# version of datasketches-cpp core library to include
-CORETAG=1.0.0-incubating
+CORETAG=2.1.0-incubating
DST=datasketches-$VER
diff --git a/src/aod_sketch_pg_functions.c b/src/aod_sketch_pg_functions.c
index 98766ef..a1f0673 100644
--- a/src/aod_sketch_pg_functions.c
+++ b/src/aod_sketch_pg_functions.c
@@ -118,8 +118,8 @@
}
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(3);
- p = PG_GETARG_FLOAT4(4);
+ lg_k = PG_NARGS() > 3 ? PG_GETARG_INT32(3) : 0;
+ p = PG_NARGS() > 4 ? PG_GETARG_FLOAT4(4) : 1;
if (lg_k) {
sketchptr = p ? aod_sketch_new_lgk_p(arr_len, lg_k, p) : aod_sketch_new_lgk(arr_len, lg_k);
} else {
@@ -129,7 +129,7 @@
sketchptr = PG_GETARG_POINTER(0);
}
- element_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ element_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
element = PG_GETARG_DATUM(1);
get_typlenbyvalalign(element_type, &typlen, &typbyval, &typalign);
if (typlen == -1) {
@@ -222,8 +222,7 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- num_values = PG_GETARG_INT32(2);
- if (num_values == 0) num_values = 1;
+ num_values = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : 1;
interptr = aod_intersection_new(num_values);
} else {
interptr = PG_GETARG_POINTER(0);
@@ -261,9 +260,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- num_values = PG_GETARG_INT32(2);
- if (num_values == 0) num_values = 1;
- lg_k = PG_GETARG_INT32(3);
+ num_values = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : 1;
+ lg_k = PG_NARGS() > 3 ? PG_GETARG_INT32(3) : 0;
unionptr = lg_k ? aod_union_new_lgk(num_values, lg_k) : aod_union_new(num_values);
} else {
unionptr = PG_GETARG_POINTER(0);
@@ -485,8 +483,7 @@
bytes_in = PG_GETARG_BYTEA_P(0);
aodptr = aod_sketch_deserialize(VARDATA(bytes_in), VARSIZE(bytes_in) - VARHDRSZ);
column_index = PG_GETARG_INT32(1);
- k = PG_GETARG_INT32(2);
- if (k == 0) k = DEFAULT_K;
+ k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : DEFAULT_K;
kllptr = aod_sketch_to_kll_float_sketch(aodptr, column_index, k);
bytes_out = kll_float_sketch_serialize(kllptr, VARHDRSZ);
kll_float_sketch_delete(kllptr);
diff --git a/src/cpc_sketch_pg_functions.c b/src/cpc_sketch_pg_functions.c
index 2e5d3d4..00a521e 100644
--- a/src/cpc_sketch_pg_functions.c
+++ b/src/cpc_sketch_pg_functions.c
@@ -79,8 +79,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(2);
- sketchptr = cpc_sketch_new(lg_k ? lg_k : CPC_DEFAULT_LG_K);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : CPC_DEFAULT_LG_K;
+ sketchptr = cpc_sketch_new(lg_k);
} else {
sketchptr = PG_GETARG_POINTER(0);
}
@@ -172,8 +172,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(2);
- unionptr = cpc_union_new(lg_k ? lg_k : CPC_DEFAULT_LG_K);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : CPC_DEFAULT_LG_K;
+ unionptr = cpc_union_new(lg_k);
} else {
unionptr = PG_GETARG_POINTER(0);
}
diff --git a/src/frequent_strings_sketch_pg_functions.c b/src/frequent_strings_sketch_pg_functions.c
index dced821..5c63c15 100644
--- a/src/frequent_strings_sketch_pg_functions.c
+++ b/src/frequent_strings_sketch_pg_functions.c
@@ -79,11 +79,7 @@
str = PG_GETARG_VARCHAR_P(2);
// optional weight
- if (PG_NARGS() == 3) {
- weight = 1;
- } else {
- weight = PG_GETARG_INT64(3);
- }
+ weight = PG_NARGS() > 3 ? PG_GETARG_INT64(3) : 1;
frequent_strings_sketch_update(sketchptr, VARDATA(str), VARSIZE(str) - VARHDRSZ, weight);
@@ -150,11 +146,7 @@
bool print_items;
char* str;
bytes_in = PG_GETARG_BYTEA_P(0);
- if (PG_NARGS() > 1) {
- print_items = PG_GETARG_BOOL(1);
- } else {
- print_items = false;
- }
+ print_items = PG_NARGS() > 1 ? PG_GETARG_BOOL(1) : false;
sketchptr = frequent_strings_sketch_deserialize(VARDATA(bytes_in), VARSIZE(bytes_in) - VARHDRSZ);
str = frequent_strings_sketch_to_string(sketchptr, print_items);
frequent_strings_sketch_delete(sketchptr);
@@ -182,11 +174,7 @@
if (PG_ARGISNULL(0)) SRF_RETURN_DONE(funcctx);
bytes_in = PG_GETARG_BYTEA_P(0);
sketchptr = frequent_strings_sketch_deserialize(VARDATA(bytes_in), VARSIZE(bytes_in) - VARHDRSZ);
- if (PG_ARGISNULL(1)) {
- threshold = 0;
- } else {
- threshold = PG_GETARG_INT64(1);
- }
+ threshold = PG_NARGS() > 1 ? PG_GETARG_INT64(1) : 0;
funcctx->user_fctx = frequent_strings_sketch_get_frequent_items(sketchptr, no_false_positives, threshold);
funcctx->max_calls = ((struct frequent_strings_sketch_result*) funcctx->user_fctx)->num;
diff --git a/src/hll_sketch_pg_functions.c b/src/hll_sketch_pg_functions.c
index 2454e66..4de4b68 100644
--- a/src/hll_sketch_pg_functions.c
+++ b/src/hll_sketch_pg_functions.c
@@ -80,9 +80,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(2);
- if (lg_k == 0) lg_k = HLL_DEFAULT_LG_K;
- tgt_type = PG_GETARG_INT32(3);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : HLL_DEFAULT_LG_K;
+ tgt_type = PG_NARGS() > 3 ? PG_GETARG_INT32(3) : 0;
if (tgt_type) {
if ((tgt_type != 4) && (tgt_type != 6) && (tgt_type != 8)) {
elog(ERROR, "hll_sketch_add_item: unsupported target type, must be 4, 6 or 8");
@@ -187,10 +186,10 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(2);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : HLL_DEFAULT_LG_K;
stateptr = palloc(sizeof(struct hll_union_state));
- stateptr->unionptr = hll_union_new(lg_k ? lg_k : HLL_DEFAULT_LG_K);
- stateptr->tgt_type = PG_GETARG_INT32(3);
+ stateptr->unionptr = hll_union_new(lg_k);
+ stateptr->tgt_type = PG_NARGS() > 3 ? PG_GETARG_INT32(3) : 0;
if (stateptr->tgt_type) {
if ((stateptr->tgt_type != 4) && (stateptr->tgt_type != 6) && (stateptr->tgt_type != 8)) {
elog(ERROR, "hll_sketch_union_agg: unsupported target type, must be 4, 6 or 8");
@@ -300,14 +299,14 @@
unsigned lg_k;
unsigned tgt_type;
- lg_k = PG_GETARG_INT32(2);
- tgt_type = PG_GETARG_INT32(3);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : HLL_DEFAULT_LG_K;
+ tgt_type = PG_NARGS() > 3 ? PG_GETARG_INT32(3) : 0;
if (tgt_type) {
if ((tgt_type != 4) && (tgt_type != 6) && (tgt_type != 8)) {
elog(ERROR, "hll_sketch_union: unsupported target type, must be 4, 6 or 8");
}
}
- unionptr = hll_union_new(lg_k ? lg_k : HLL_DEFAULT_LG_K);
+ unionptr = hll_union_new(lg_k);
if (!PG_ARGISNULL(0)) {
bytes_in1 = PG_GETARG_BYTEA_P(0);
sketchptr1 = hll_sketch_deserialize(VARDATA(bytes_in1), VARSIZE(bytes_in1) - VARHDRSZ);
diff --git a/src/kll_float_sketch_pg_functions.c b/src/kll_float_sketch_pg_functions.c
index 87426c8..3e31159 100644
--- a/src/kll_float_sketch_pg_functions.c
+++ b/src/kll_float_sketch_pg_functions.c
@@ -77,8 +77,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- k = PG_GETARG_INT32(2);
- sketchptr = kll_float_sketch_new(k ? k : DEFAULT_K);
+ k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : DEFAULT_K;
+ sketchptr = kll_float_sketch_new(k);
} else {
sketchptr = PG_GETARG_POINTER(0);
}
@@ -160,8 +160,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- k = PG_GETARG_INT32(2);
- unionptr = kll_float_sketch_new(k ? k : DEFAULT_K);
+ k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : DEFAULT_K;
+ unionptr = kll_float_sketch_new(k);
} else {
unionptr = PG_GETARG_POINTER(0);
}
diff --git a/src/theta_sketch_pg_functions.c b/src/theta_sketch_pg_functions.c
index 1e9f077..04f64bf 100644
--- a/src/theta_sketch_pg_functions.c
+++ b/src/theta_sketch_pg_functions.c
@@ -86,8 +86,8 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(2);
- p = PG_GETARG_FLOAT4(3);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : 0;
+ p = PG_NARGS() > 3 ? PG_GETARG_FLOAT4(3) : 1;
if (lg_k) {
sketchptr = p ? theta_sketch_new_lgk_p(lg_k, p) : theta_sketch_new_lgk(lg_k);
} else {
@@ -219,7 +219,7 @@
oldcontext = MemoryContextSwitchTo(aggcontext);
if (PG_ARGISNULL(0)) {
- lg_k = PG_GETARG_INT32(2);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : 0;
unionptr = lg_k ? theta_union_new(lg_k) : theta_union_new_default();
} else {
unionptr = PG_GETARG_POINTER(0);
@@ -347,7 +347,7 @@
struct ptr_with_size bytes_out;
int lg_k;
- lg_k = PG_GETARG_INT32(2);
+ lg_k = PG_NARGS() > 2 ? PG_GETARG_INT32(2) : 0;
unionptr = lg_k ? theta_union_new(lg_k) : theta_union_new_default();
if (!PG_ARGISNULL(0)) {
bytes_in1 = PG_GETARG_BYTEA_P(0);