Merge pull request #19 from apache/histogram

added histogram function
diff --git a/sql/datasketches_kll_float_sketch.sql b/sql/datasketches_kll_float_sketch.sql
index b179a75..332496f 100644
--- a/sql/datasketches_kll_float_sketch.sql
+++ b/sql/datasketches_kll_float_sketch.sql
@@ -105,3 +105,11 @@
 CREATE OR REPLACE FUNCTION kll_float_sketch_get_quantiles(kll_float_sketch, double precision[]) RETURNS real[]
     AS '$libdir/datasketches', 'pg_kll_float_sketch_get_quantiles'
     LANGUAGE C STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION kll_float_sketch_get_histogram(kll_float_sketch) RETURNS double precision[]
+    AS '$libdir/datasketches', 'pg_kll_float_sketch_get_histogram'
+    LANGUAGE C STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION kll_float_sketch_get_histogram(kll_float_sketch, int) RETURNS double precision[]
+    AS '$libdir/datasketches', 'pg_kll_float_sketch_get_histogram'
+    LANGUAGE C STRICT IMMUTABLE;
diff --git a/src/kll_float_sketch_c_adapter.cpp b/src/kll_float_sketch_c_adapter.cpp
index 721c359..1eac1b2 100644
--- a/src/kll_float_sketch_c_adapter.cpp
+++ b/src/kll_float_sketch_c_adapter.cpp
@@ -131,14 +131,19 @@
   pg_unreachable();
 }
 
-Datum* kll_float_sketch_get_pmf_or_cdf(const void* sketchptr, const float* split_points, unsigned num_split_points, bool is_cdf) {
+Datum* kll_float_sketch_get_pmf_or_cdf(const void* sketchptr, const float* split_points, unsigned num_split_points, bool is_cdf, bool scale) {
   try {
     auto array = is_cdf ?
       static_cast<const kll_float_sketch*>(sketchptr)->get_CDF(split_points, num_split_points) :
       static_cast<const kll_float_sketch*>(sketchptr)->get_PMF(split_points, num_split_points);
     Datum* pmf = (Datum*) palloc(sizeof(Datum) * (num_split_points + 1));
+    const uint64_t n = static_cast<const kll_float_sketch*>(sketchptr)->get_n();
     for (unsigned i = 0; i < num_split_points + 1; i++) {
-      pmf[i] = pg_float8_get_datum(array[i]);
+      if (scale) {
+        pmf[i] = pg_float8_get_datum(array[i] * n);
+      } else {
+        pmf[i] = pg_float8_get_datum(array[i]);
+      }
     }
     return pmf;
   } catch (std::exception& e) {
diff --git a/src/kll_float_sketch_c_adapter.h b/src/kll_float_sketch_c_adapter.h
index 75dd62b..ff9723c 100644
--- a/src/kll_float_sketch_c_adapter.h
+++ b/src/kll_float_sketch_c_adapter.h
@@ -43,7 +43,7 @@
 void* kll_float_sketch_deserialize(const char* buffer, unsigned length);
 unsigned kll_float_sketch_get_serialized_size_bytes(const void* sketchptr);
 
-void** kll_float_sketch_get_pmf_or_cdf(const void* sketchptr, const float* split_points, unsigned num_split_points, bool is_cdf);
+void** kll_float_sketch_get_pmf_or_cdf(const void* sketchptr, const float* split_points, unsigned num_split_points, bool is_cdf, bool scale);
 void** kll_float_sketch_get_quantiles(const void* sketchptr, const double* fractions, unsigned num_fractions);
 
 #ifdef __cplusplus
diff --git a/src/kll_float_sketch_pg_functions.c b/src/kll_float_sketch_pg_functions.c
index 14cc3c1..08eb22c 100644
--- a/src/kll_float_sketch_pg_functions.c
+++ b/src/kll_float_sketch_pg_functions.c
@@ -38,6 +38,7 @@
 PG_FUNCTION_INFO_V1(pg_kll_float_sketch_get_pmf);
 PG_FUNCTION_INFO_V1(pg_kll_float_sketch_get_cdf);
 PG_FUNCTION_INFO_V1(pg_kll_float_sketch_get_quantiles);
+PG_FUNCTION_INFO_V1(pg_kll_float_sketch_get_histogram);
 
 /* function declarations */
 Datum pg_kll_float_sketch_recv(PG_FUNCTION_ARGS);
@@ -52,8 +53,10 @@
 Datum pg_kll_float_sketch_get_pmf(PG_FUNCTION_ARGS);
 Datum pg_kll_float_sketch_get_cdf(PG_FUNCTION_ARGS);
 Datum pg_kll_float_sketch_get_quantiles(PG_FUNCTION_ARGS);
+Datum pg_kll_float_sketch_get_histogram(PG_FUNCTION_ARGS);
 
 static const unsigned DEFAULT_K = 200;
+static const unsigned DEFAULT_NUM_BINS = 10;
 
 Datum pg_kll_float_sketch_add_item(PG_FUNCTION_ARGS) {
   void* sketchptr;
@@ -227,7 +230,7 @@
   for (i = 0; i < arr_len_in; i++) {
     split_points[i] = DatumGetFloat4(data_in[i]);
   }
-  result = (Datum*) kll_float_sketch_get_pmf_or_cdf(sketchptr, split_points, arr_len_in, false);
+  result = (Datum*) kll_float_sketch_get_pmf_or_cdf(sketchptr, split_points, arr_len_in, false, false);
   pfree(split_points);
 
   // construct output array of fractions
@@ -277,7 +280,7 @@
   for (i = 0; i < arr_len_in; i++) {
     split_points[i] = DatumGetFloat4(data_in[i]);
   }
-  result = (Datum*) kll_float_sketch_get_pmf_or_cdf(sketchptr, split_points, arr_len_in, true);
+  result = (Datum*) kll_float_sketch_get_pmf_or_cdf(sketchptr, split_points, arr_len_in, true, false);
   pfree(split_points);
 
   // construct output array of fractions
@@ -337,3 +340,47 @@
 
   PG_RETURN_ARRAYTYPE_P(arr_out);
 }
+
+Datum pg_kll_float_sketch_get_histogram(PG_FUNCTION_ARGS) {
+  const bytea* bytes_in;
+  void* sketchptr;
+  int num_bins;
+
+  // output array of bins
+  Datum* result;
+  ArrayType* arr_out;
+  int16 elmlen_out;
+  bool elmbyval_out;
+  char elmalign_out;
+  int arr_len_out;
+
+  int i;
+
+  bytes_in = PG_GETARG_BYTEA_P(0);
+  sketchptr = kll_float_sketch_deserialize(VARDATA(bytes_in), VARSIZE(bytes_in) - VARHDRSZ);
+
+  num_bins = PG_GETARG_INT32(1);
+  if (num_bins == 0) num_bins = DEFAULT_NUM_BINS;
+  if (num_bins < 2) {
+    elog(ERROR, "at least two bins expected");
+  }
+
+  float* split_points = palloc(sizeof(float) * (num_bins - 1));
+  const float min_value = kll_float_sketch_get_quantile(sketchptr, 0);
+  const float max_value = kll_float_sketch_get_quantile(sketchptr, 1);
+  const float delta = (max_value - min_value) / num_bins;
+  for (i = 0; i < num_bins - 1; i++) {
+    split_points[i] = min_value + delta * (i + 1);
+  }
+  result = (Datum*) kll_float_sketch_get_pmf_or_cdf(sketchptr, split_points, num_bins - 1, false, true);
+  pfree(split_points);
+
+  // construct output array
+  arr_len_out = num_bins;
+  get_typlenbyvalalign(FLOAT8OID, &elmlen_out, &elmbyval_out, &elmalign_out);
+  arr_out = construct_array(result, arr_len_out, FLOAT8OID, elmlen_out, elmbyval_out, elmalign_out);
+
+  kll_float_sketch_delete(sketchptr);
+
+  PG_RETURN_ARRAYTYPE_P(arr_out);
+}