Merge pull request #30 from apache/aod_sketch
array of doubles sketch
diff --git a/.asf.yaml b/.asf.yaml
new file mode 100644
index 0000000..ea5ead8
--- /dev/null
+++ b/.asf.yaml
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+github:
+ homepage: https://datasketches.apache.org
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..840fb0d
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,12 @@
+services:
+ - docker
+
+install:
+ - docker build --pull -t datasketch-postgres:latest .
+ - docker run --name some-postgres -e POSTGRES_PASSWORD=mysecretpassword -d datasketch-postgres:latest
+
+script:
+ - sleep 3
+ - docker logs some-postgres
+ - docker ps | grep -q some-postgres
+ - docker exec some-postgres psql -U postgres -c 'select theta_sketch_get_estimate(theta_sketch_build(1))'
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..76c6e97
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG BASE_IMAGE_VERSION=latest
+
+ARG DATASKETCHES_CPP_HASH=8135b65408947694e13bd131038889e439847aa2
+ARG DATASKETCHES_CPP_VERSION=2.0.0-incubating
+
+FROM postgres:$BASE_IMAGE_VERSION
+
+MAINTAINER dev@datasketches.apache.org
+
+ENV APACHE_DIST_URLS \
+ https://www.apache.org/dyn/closer.cgi?action=download&filename= \
+ https://www-us.apache.org/dist/ \
+ https://www.apache.org/dist/ \
+ https://archive.apache.org/dist/
+
+ARG DATASKETCHES_CPP_VERSION
+ARG DATASKETCHES_CPP_HASH
+
+ENV DS_CPP_VER=$DATASKETCHES_CPP_VERSION
+ENV DS_CPP_HASH=$DATASKETCHES_CPP_HASH
+
+
+ADD . /datasketches-postgresql
+WORKDIR /datasketches-postgresql
+
+RUN echo "===> Adding prerequisites..." && \
+ export PG_MAJOR=`apt list --installed 2>&1 | sed -n "s/^postgresql-\([0-9.]*\)\/.*/\1/p"` && \
+ export PG_MINOR=`apt list --installed 2>&1 | sed -n "s/^postgresql-$PG_MAJOR\/\S*\s\(\S*\)\s.*/\1/p"` && \
+ apt-get update -y && \
+ DEBIAN_FRONTEND=noninteractive \
+ apt-get install --no-install-recommends --allow-downgrades -y -q \
+ ca-certificates \
+ build-essential wget unzip \
+ postgresql-server-dev-$PG_MAJOR=$PG_MINOR \
+ libpq-dev=$PG_MINOR && \
+ \
+ \
+ echo "===> Building datasketches..." && \
+ set -eux; \
+ download_bin() { \
+ local f="$1"; shift; \
+ local hash="$1"; shift; \
+ local distFile="$1"; shift; \
+ local success=; \
+ local distUrl=; \
+ for distUrl in $APACHE_DIST_URLS; do \
+ if wget -nv -O "$f" "$distUrl$distFile"; then \
+ success=1; \
+ # Checksum the download \
+ echo "$hash" "*$f" | sha1sum -c -; \
+ break; \
+ fi; \
+ done; \
+ [ -n "$success" ]; \
+ } && \
+ download_bin "datasketches-cpp.zip" "$DS_CPP_HASH" "incubator/datasketches/cpp/$DS_CPP_VER/apache-datasketches-cpp-$DS_CPP_VER-src.zip" && \
+ unzip datasketches-cpp.zip && \
+ mv apache-datasketches-cpp-$DS_CPP_VER-src datasketches-cpp && \
+ make && \
+ make install && \
+ \
+ \
+ echo "===> Clean up..." && \
+ apt-mark hold postgresql-$PG_MAJOR postgresql-client-$PG_MAJOR && \
+ apt-get -y remove --purge --auto-remove \
+ ca-certificates \
+ build-essential wget unzip \
+ postgresql-server-dev-$PG_MAJOR libpq-dev && \
+ apt-get clean && \
+ rm -rf /datasketches-postgresql /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+ADD /docker-entrypoint-initdb.d /docker-entrypoint-initdb.d
+
+WORKDIR /
+
+ENTRYPOINT ["/docker-entrypoint.sh"]
+EXPOSE 5432
+CMD ["postgres"]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 0faccce..41bf70c 100644
--- a/Makefile
+++ b/Makefile
@@ -52,6 +52,15 @@
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
+# fix LLVM JIT compilation error
+ifeq ($(with_llvm), yes)
+ COMPILE.cxx.bc = $(CLANG) -xc++ -Wno-ignored-attributes $(BITCODE_CXXFLAGS) $(CPPFLAGS) -emit-llvm -c
+endif
+
+%.bc : %.cpp
+ $(COMPILE.cxx.bc) -o $@ $<
+ if [ "$(with_llvm)" = "yes" ]; then $(LLVM_BINPATH)/opt -module-summary -f $@ -o $@; fi
+
# generate combined sql
$(SQL_INSTALL): $(SQL_MODULES)
cat $^ > $@
diff --git a/README.md b/README.md
index e3821b5..3b2fcd9 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,38 @@
(1 row)
+### Docker
+
+Build Docker image:
+
+ $ docker build . -t datasketch-postgres:latest
+
+Build Docker image with specific version
+
+ $ docker build --pull --build-arg BASE_IMAGE_VERSION=10 -t datasketch-postgres:10 .
+
+Run container:
+
+ $ docker run --name some-postgres -e POSTGRES_PASSWORD=mysecretpassword -d datasketch-postgres:latest
+
+Test DataSketches in PostgreSQL:
+
+ $ docker exec -it some-postgres psql -U postgres
+ postgres=# SELECT cpc_sketch_get_estimate(cpc_sketch_union(respondents_sketch)) AS num_respondents, flavor
+ FROM (
+ SELECT
+ cpc_sketch_build(respondent) AS respondents_sketch,
+ flavor,
+ country
+ FROM (
+ SELECT * FROM (
+ VALUES (1, 'Vanilla', 'CH'),
+ (1, 'Chocolate', 'CH'),
+ (2, 'Chocolate', 'US'),
+ (2, 'Strawberry', 'US')) AS t(respondent, flavor, country)) as foo
+ GROUP BY flavor, country) as bar
+ GROUP BY flavor;
+
## Examples
### Distinct counting with CPC sketch
diff --git a/docker-entrypoint-initdb.d/01_init_datasketches.sql b/docker-entrypoint-initdb.d/01_init_datasketches.sql
new file mode 100644
index 0000000..e25d562
--- /dev/null
+++ b/docker-entrypoint-initdb.d/01_init_datasketches.sql
@@ -0,0 +1 @@
+CREATE EXTENSION IF NOT EXISTS datasketches
\ No newline at end of file