Merge pull request #66 from okumin/udf-descriptions-warning

Add @Description to UDFs registered in Apache Hive
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java
index d5a8e84..0aa94a7 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java
@@ -55,7 +55,7 @@
     extended = "Example:\n"
     + "> SELECT dataToSketch(val, 12) FROM src;\n"
     + "The return value is a binary blob that can be operated on by other sketch related functions."
-    + " The lgK parameter controls the sketch size and rlative error expected from the sketch."
+    + " The lgK parameter controls the sketch size and relative error expected from the sketch."
     + " It is optional and must be from 4 to 26. The default is 11, which is expected to yield errors"
     + " of roughly +-1.5% in the estimation of uniques with 95% confidence."
     + " The seed parameter is optional")
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java
index ab81a8d..98e260b 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java
@@ -51,7 +51,7 @@
     extended = "Example:\n"
     + "> SELECT UnionSketch(sketch) FROM src;\n"
     + "The return value is a binary blob that can be operated on by other sketch related functions."
-    + " The lgK parameter controls the sketch size and rlative error expected from the sketch."
+    + " The lgK parameter controls the sketch size and relative error expected from the sketch."
     + " It is optional an must be from 4 to 26. The default is 11, which is expected to yield errors"
     + " of roughly +-1.5% in the estimation of uniques with 95% confidence."
     + " The seed parameter is optional")
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDF.java b/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDF.java
index 29507b9..8f8e67e 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDF.java
@@ -24,12 +24,22 @@
 import org.apache.datasketches.cpc.CpcSketch;
 import org.apache.datasketches.cpc.CpcUnion;
 import org.apache.datasketches.hive.common.BytesWritableHelper;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 /**
  * Hive union sketch UDF.
  */
+@Description(
+    name = "unionSketch",
+    value = "_FUNC_(firstSketch, secondSketch[, lgK[, seed]]) - Compute the union of the given "
+        + "sketches with the given size and seed",
+    extended = "The return value is a binary blob that can be operated on by other sketch related functions."
+        + " The lgK parameter controls the sketch size and relative error expected from the sketch."
+        + " It is optional an must be from 4 to 26. The default is 11, which is expected to yield errors"
+        + " of roughly +-1.5% in the estimation of uniques with 95% confidence."
+        + " The seed parameter is optional")
 @SuppressWarnings("deprecation")
 public class UnionSketchUDF extends UDF {
 
diff --git a/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java
index c8b7aa6..db0f638 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java
@@ -54,7 +54,7 @@
     extended = "Example:\n"
     + "> SELECT dataToSketch(val, 12) FROM src;\n"
     + "The return value is a binary blob that can be operated on by other sketch related functions."
-    + " The lgK parameter controls the sketch size and rlative error expected from the sketch."
+    + " The lgK parameter controls the sketch size and relative error expected from the sketch."
     + " It is optional and must be from 4 to 21. The default is 12, which is expected to yield errors"
     + " of roughly +-3% in the estimation of uniques with 95% confidence."
     + " The target type parameter is optional and must be 'HLL_4', 'HLL_6' or 'HLL_8'."
diff --git a/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java
index de8b579..a91ce91 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java
@@ -50,7 +50,7 @@
     extended = "Example:\n"
     + "> SELECT UnionSketch(sketch) FROM src;\n"
     + "The return value is a binary blob that can be operated on by other sketch related functions."
-    + " The lgK parameter controls the sketch size and rlative error expected from the sketch."
+    + " The lgK parameter controls the sketch size and relative error expected from the sketch."
     + " It is optional and must be from 4 to 21. The default is 12, which is expected to yield errors"
     + " of roughly +-3% in the estimation of uniques with 95% confidence."
     + " The target type parameter is optional and must be 'HLL_4', 'HLL_6' or 'HLL_8'."
diff --git a/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDF.java b/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDF.java
index b238b9d..59da61f 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDF.java
@@ -23,12 +23,23 @@
 import org.apache.datasketches.hll.HllSketch;
 import org.apache.datasketches.hll.TgtHllType;
 import org.apache.datasketches.hll.Union;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 /**
  * Hive union sketch UDF.
  */
+@Description(
+    name = "unionSketch",
+    value = "_FUNC_(firstSketch, secondSketch[, lgK[, type]]) - Compute the union of the given "
+        + "sketches with the given size and seed",
+    extended = "The return value is a binary blob that can be operated on by other sketch related functions."
+        + " The lgK parameter controls the sketch size and relative error expected from the sketch."
+        + " It is optional and must be from 4 to 21. The default is 12, which is expected to yield errors"
+        + " of roughly +-3% in the estimation of uniques with 95% confidence."
+        + " The target type parameter is optional and must be 'HLL_4', 'HLL_6' or 'HLL_8'."
+        + " The default is 'HLL_4'")
 @SuppressWarnings("deprecation")
 public class UnionSketchUDF extends UDF {
 
diff --git a/src/main/java/org/apache/datasketches/hive/theta/EstimateSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/EstimateSketchUDF.java
index b66774b..174d03f 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/EstimateSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/EstimateSketchUDF.java
@@ -24,13 +24,17 @@
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.theta.Sketch;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 /**
  * Hive estimate sketch udf. V4
- *
  */
+@Description(
+    name = "estimateSketch",
+    value = "_FUNC_(sketch) - Return the estimate unique count of the given sketch",
+    extended = "The given sketch is a binary blob computed by other Theta Sketch UDFs")
 @SuppressWarnings("deprecation")
 public class EstimateSketchUDF extends UDF {
 
diff --git a/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java
index d61b53c..090fd70 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java
@@ -25,6 +25,7 @@
 import org.apache.datasketches.theta.AnotB;
 import org.apache.datasketches.theta.SetOperation;
 import org.apache.datasketches.theta.Sketch;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
@@ -32,6 +33,14 @@
  * Hive exclude sketch UDF. (i.e. in sketch a but not in sketch b)
  *
  */
+@Description(
+    name = "excludeSketch",
+    value = "_FUNC_(firstSketch, secondSketch[, seed]) - Computes the set difference, A-AND-NOT-B,"
+        + "of the given sketches",
+    extended = "The return value is a binary blob that contains a compact sketch, which can "
+        + "be operated on by the other sketch-related functions. "
+        + "The seed is optional, "
+        + "and using it is not recommended unless you really know why you need it.")
 @SuppressWarnings("deprecation")
 public class ExcludeSketchUDF extends UDF {
 
diff --git a/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java
index 9999ee3..603f586 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java
@@ -25,6 +25,7 @@
 import org.apache.datasketches.theta.Intersection;
 import org.apache.datasketches.theta.SetOperation;
 import org.apache.datasketches.theta.Sketch;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
@@ -32,6 +33,14 @@
  * Hive intersection sketch UDF.
  *
  */
+@Description(
+    name = "intersectSketch",
+    value = "_FUNC_(firstSketch, secondSketch[, seed]) - Compute the intersection of the"
+        + "given sketches",
+    extended = "The return value is a binary blob that contains a compact sketch, which can "
+        + "be operated on by the other sketch-related functions. "
+        + "The seed is optional, "
+        + "and using it is not recommended unless you really know why you need it.")
 @SuppressWarnings("deprecation")
 public class IntersectSketchUDF extends UDF {
 
diff --git a/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java
index db2b55f..7856287 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java
@@ -25,12 +25,25 @@
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.theta.SetOperation;
 import org.apache.datasketches.theta.Union;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 /**
  * Hive union sketch UDF.
  */
+@Description(
+    name = "unionSketch",
+    value = "_FUNC_(firstSketch, secondSketch[, size[, seed]]) - Compute the union of the given "
+        + "sketches with the given size and seed",
+    extended = "The return value is a binary blob that contains a compact sketch, which can "
+        + "be operated on by the other sketch-related functions. The optional "
+        + "size must be a power of 2, and controls the relative error of the expected "
+        + "result. A size of 16384 can be expected to yeild errors of roughly +-1.5% "
+        + "in the estimation of uniques with 95% confidence. "
+        + "The default size is defined in the sketches-core library and at the time of this writing "
+        + "was 4096 (about 3% error). "
+        + "The seed is optional, and using it is not recommended unless you really know why you need it")
 @SuppressWarnings("deprecation")
 public class UnionSketchUDF extends UDF {
 
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java
index c27ea6c..e12dcb7 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java
@@ -27,6 +27,7 @@
 import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
 import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
 import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
+import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -38,6 +39,12 @@
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.BytesWritable;
 
+@Description(
+    name = "ArrayOfDoublesSketchToValues",
+    value = "_FUNC_(sketch) Return the list of tuple values",
+    extended = "Returns associated values of a given ArrayOfDoublesSketch as rows."
+        + " Each row will be N double values, where N is the number of double values kept in the"
+        + " sketch per key.")
 public class ArrayOfDoublesSketchToValuesUDTF extends GenericUDTF {
 
   PrimitiveObjectInspector inputObjectInspector;