Merge pull request #56 from apache/Update_to_core2.0.0
Updates required to match java core 2.0.0.
diff --git a/pom.xml b/pom.xml
index 2ca8c7d..54e3c1f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -21,6 +21,7 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -30,17 +31,14 @@
</parent>
<groupId>org.apache.datasketches</groupId>
-
- <!-- UNIQUE FOR THIS JAVA COMPONENT -->
<artifactId>datasketches-hive</artifactId>
<version>1.2.0-SNAPSHOT</version>
- <description>Apache Hive adaptors for the DataSketches library.</description>
- <!-- END: UNIQUE FOR THIS JAVA COMPONENT -->
-
- <url>https://datasketches.apache.org/</url>
+ <packaging>jar</packaging>
+
<name>${project.artifactId}</name>
+ <description>Apache Hive adaptors for the DataSketches library.</description>
+ <url>https://datasketches.apache.org/</url>
<inceptionYear>2015</inceptionYear>
- <packaging>jar</packaging> <!-- jar is the default -->
<mailingLists>
<mailingList>
@@ -85,7 +83,7 @@
<properties>
<!-- UNIQUE FOR THIS JAVA COMPONENT -->
- <datasketches-java.version>1.3.0-incubating</datasketches-java.version>
+ <datasketches-java.version>2.0.0</datasketches-java.version>
<hive-exec.version>2.3.4</hive-exec.version>
<hadoop-common.version>2.8.5</hadoop-common.version>
<slf4j-simple.version>1.7.30</slf4j-simple.version>
@@ -95,7 +93,7 @@
<testng.version>7.1.0</testng.version>
<!-- System-wide properties -->
- <maven.version>3.0.0</maven.version>
+ <maven.version>3.5.0</maven.version>
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
@@ -105,8 +103,11 @@
<project.build.resourceEncoding>${charset.encoding}</project.build.resourceEncoding>
<project.reporting.outputEncoding>${charset.encoding}</project.reporting.outputEncoding>
- <!-- org.codehaus.plexus used for strict profile testing-->
- <plexus-compiler-javac-errorprone.version>2.8.8</plexus-compiler-javac-errorprone.version>
+ <!-- org.codehaus plugins -->
+ <!-- used for strict profile testing-->
+ <plexus-compiler-javac-errorprone.version>2.8.5</plexus-compiler-javac-errorprone.version>
+ <versions-maven-plugin.version>2.8.1</versions-maven-plugin.version>
+
<!-- Maven Plugins -->
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <!-- overrides parent -->
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version> <!-- overrides parent -->
@@ -124,6 +125,7 @@
<!-- org.jacoco Maven Plugins -->
<jacoco-maven-plugin.version>0.8.6</jacoco-maven-plugin.version>
<!-- org.eluder Maven Plugins -->
+ <coveralls-repo-token></coveralls-repo-token>
<coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
<!-- other -->
<lifecycle-mapping.version>1.0.0</lifecycle-mapping.version>
@@ -209,6 +211,13 @@
<build>
<pluginManagement>
<plugins>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>versions-maven-plugin</artifactId>
+ <version>${versions-maven-plugin.version}</version>
+ </plugin>
+
<plugin>
<!-- We want to deploy the artifacts to a staging location for perusal -->
<!-- Apache Parent pom: apache-release profile -->
@@ -220,12 +229,14 @@
<!-- see maven-install-plugin -->
</configuration>
</plugin>
+
<plugin>
<!-- Apache Parent pom, pluginManagement-->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>${maven-release-plugin.version}</version>
</plugin>
+
<plugin>
<!-- Extends Apache Parent pom, pluginManagement-->
<groupId>org.apache.maven.plugins</groupId>
@@ -234,18 +245,21 @@
<executions>
<execution>
<id>default-jar</id>
+ <phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
<execution>
<id>default-test-jar</id>
+ <phase>package</phase>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
+
<plugin>
<!-- Extends Apache Parent pom, apache-release profile -->
<groupId>org.apache.maven.plugins</groupId>
@@ -263,6 +277,7 @@
</execution>
</executions>
</plugin>
+
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
@@ -293,6 +308,7 @@
</execution>
</executions>
</plugin>
+
<plugin>
<!-- Apache Parent pom, pluginManagement-->
<groupId>org.apache.rat</groupId>
@@ -312,13 +328,16 @@
<useDefaultExcludes>true</useDefaultExcludes>
<excludes>
<!-- rat uses .gitignore for excludes by default -->
+ <exclude>**/*.yaml</exclude>
+ <exclude>**/*.yml</exclude>
+ <exclude>**/.*</exclude>
<exclude>**/test/resources/**/*.txt</exclude>
- <exclude>.asf.yaml</exclude>
<exclude>LICENSE</exclude>
<exclude>NOTICE</exclude>
</excludes>
</configuration>
</plugin>
+
<plugin>
<!-- Extends Apache Parent pom, apache-release profile -->
<groupId>org.apache.maven.plugins</groupId>
@@ -341,6 +360,7 @@
</execution>
</executions>
</plugin>
+
<plugin>
<!-- Apache Parent pom, pluginManagement-->
<groupId>org.apache.maven.plugins</groupId>
@@ -352,6 +372,7 @@
<redirectTestOutputToFile>true</redirectTestOutputToFile>
</configuration>
</plugin>
+
<plugin>
<!-- Generates code coverage report from website. -->
<groupId>org.jacoco</groupId>
@@ -359,22 +380,30 @@
<version>${jacoco-maven-plugin.version}</version>
<executions>
<execution>
- <id>prepare-agent</id>
+ <id>default-prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
+ <execution>
+ <id>default-report</id>
+ <goals>
+ <goal>report</goal>
+ </goals>
+ </execution>
</executions>
</plugin>
+
<plugin>
<!-- Submit code coverage report to Coveralls.io. -->
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>${coveralls-maven-plugin.version}</version>
<configuration>
- <!-- Since we use Travis CI we do not have to put a Coveralls token here. -->
+ <repoToken>${coveralls-repo-token}</repoToken>
</configuration>
</plugin>
+
</plugins>
</pluginManagement>
<plugins>
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java
index 732a5b8..a9d44a0 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/DataToSketchUDAF.java
@@ -41,7 +41,12 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
- * Hive UDAF to create an HllSketch from raw data.
+ * Hive UDAF to create an CPCSketch from raw data.
+ *
+ * <p><b>Note</b> Strings as raw data values are encoded as a UTF-16 VARCHAR
+ * prior to being submitted to the sketch. If the user requires a different
+ * encoding for cross-platform compatibility, it is recommended that these values be encoded prior
+ * to being submitted and then typed as a BINARY byte[].</p>
*/
@Description(
name = "dataToSketch",
@@ -51,7 +56,7 @@
+ "> SELECT dataToSketch(val, 12) FROM src;\n"
+ "The return value is a binary blob that can be operated on by other sketch related functions."
+ " The lgK parameter controls the sketch size and rlative error expected from the sketch."
- + " It is optional an must be from 4 to 26. The default is 11, which is expected to yield errors"
+ + " It is optional and must be from 4 to 26. The default is 11, which is expected to yield errors"
+ " of roughly +-1.5% in the estimation of uniques with 95% confidence."
+ " The seed parameter is optional")
public class DataToSketchUDAF extends AbstractGenericUDAFResolver {
@@ -119,7 +124,7 @@
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
// Different State is used for the iterate phase and the merge phase.
// SketchState is more space-efficient, so let's use SketchState if possible.
- if ((mode_ == Mode.PARTIAL1) || (mode_ == Mode.COMPLETE)) { // iterate() will be used
+ if (this.mode_ == Mode.PARTIAL1 || this.mode_ == Mode.COMPLETE) { // iterate() will be used
return new SketchState();
}
return new UnionState();
@@ -136,22 +141,22 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- mode_ = mode;
- if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
+ this.mode_ = mode;
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
- inputInspector_ = (PrimitiveObjectInspector) parameters[0];
+ this.inputInspector_ = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
+ this.lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
- seedInspector_ = (PrimitiveObjectInspector) parameters[2];
+ this.seedInspector_ = (PrimitiveObjectInspector) parameters[2];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateInspector_ = (StructObjectInspector) parameters[0];
+ this.intermediateInspector_ = (StructObjectInspector) parameters[0];
}
- if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
// intermediate results need to include the lgK and the target HLL type
return ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(LG_K_FIELD, SEED_FIELD, SKETCH_FIELD),
@@ -176,24 +181,25 @@
* java.lang.Object[])
*/
@Override
- public void iterate(final @SuppressWarnings("deprecation") AggregationBuffer agg,
+ @SuppressWarnings("deprecation")
+ public void iterate(final AggregationBuffer agg,
final Object[] parameters) throws HiveException {
if (parameters[0] == null) { return; }
final SketchState state = (SketchState) agg;
if (!state.isInitialized()) {
initializeState(state, parameters);
}
- state.update(parameters[0], inputInspector_);
+ state.update(parameters[0], this.inputInspector_);
}
private void initializeState(final State state, final Object[] parameters) {
int lgK = DEFAULT_LG_K;
- if (lgKInspector_ != null) {
- lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], lgKInspector_);
+ if (this.lgKInspector_ != null) {
+ lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.lgKInspector_);
}
long seed = DEFAULT_UPDATE_SEED;
- if (seedInspector_ != null) {
- seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], seedInspector_);
+ if (this.seedInspector_ != null) {
+ seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], this.seedInspector_);
}
state.init(lgK, seed);
}
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/SketchEvaluator.java b/src/main/java/org/apache/datasketches/hive/cpc/SketchEvaluator.java
index ca2e0a9..b07505e 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/SketchEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/SketchEvaluator.java
@@ -61,23 +61,24 @@
}
@Override
- public void merge(final @SuppressWarnings("deprecation") AggregationBuffer buf, final Object data)
+ @SuppressWarnings("deprecation")
+ public void merge(final AggregationBuffer buf, final Object data)
throws HiveException {
if (data == null) { return; }
final UnionState state = (UnionState) buf;
if (!state.isInitialized()) {
initializeState(state, data);
}
- final BytesWritable serializedSketch = (BytesWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
+ final BytesWritable serializedSketch = (BytesWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
state.update(CpcSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch), state.getSeed()));
}
private void initializeState(final UnionState state, final Object data) {
- final int lgK = ((IntWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(LG_K_FIELD))).get();
- final long seed = ((LongWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SEED_FIELD))).get();
+ final int lgK = ((IntWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(LG_K_FIELD))).get();
+ final long seed = ((LongWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SEED_FIELD))).get();
state.init(lgK, seed);
}
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/SketchState.java b/src/main/java/org/apache/datasketches/hive/cpc/SketchState.java
index 5206fe7..37911cc 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/SketchState.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/SketchState.java
@@ -29,47 +29,47 @@
@Override
boolean isInitialized() {
- return sketch_ != null;
+ return this.sketch_ != null;
}
@Override
void init(final int logK, final long seed) {
super.init(logK, seed);
- sketch_ = new CpcSketch(logK, seed);
+ this.sketch_ = new CpcSketch(logK, seed);
}
void update(final Object data, final PrimitiveObjectInspector objectInspector) {
switch (objectInspector.getPrimitiveCategory()) {
case BINARY:
- sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data, objectInspector)
.copyBytes());
return;
case BYTE:
- sketch_.update(PrimitiveObjectInspectorUtils.getByte(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getByte(data, objectInspector));
return;
case DOUBLE:
- sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data, objectInspector));
return;
case FLOAT:
- sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data, objectInspector));
return;
case INT:
- sketch_.update(PrimitiveObjectInspectorUtils.getInt(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getInt(data, objectInspector));
return;
case LONG:
- sketch_.update(PrimitiveObjectInspectorUtils.getLong(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getLong(data, objectInspector));
return;
case STRING:
// conversion to char[] avoids costly UTF-8 encoding
- sketch_.update(PrimitiveObjectInspectorUtils.getString(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getString(data, objectInspector)
.toCharArray());
return;
case CHAR:
- sketch_.update(PrimitiveObjectInspectorUtils.getHiveChar(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getHiveChar(data, objectInspector)
.getValue().toCharArray());
return;
case VARCHAR:
- sketch_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(data, objectInspector)
.getValue().toCharArray());
return;
default:
@@ -82,13 +82,13 @@
@Override
CpcSketch getResult() {
- if (sketch_ == null) { return null; }
- return sketch_;
+ if (this.sketch_ == null) { return null; }
+ return this.sketch_;
}
@Override
void reset() {
- sketch_ = null;
+ this.sketch_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/State.java b/src/main/java/org/apache/datasketches/hive/cpc/State.java
index f3b9e1e..c9bcaee 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/State.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/State.java
@@ -28,16 +28,16 @@
private long seed_;
void init(final int lgK, final long seed) {
- lgK_ = lgK;
- seed_ = seed;
+ this.lgK_ = lgK;
+ this.seed_ = seed;
}
int getLgK() {
- return lgK_;
+ return this.lgK_;
}
long getSeed() {
- return seed_;
+ return this.seed_;
}
abstract boolean isInitialized();
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java
index 7e22c3d..726ceb8 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/UnionSketchUDAF.java
@@ -130,16 +130,16 @@
super.init(mode, parameters);
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
- inputInspector_ = (PrimitiveObjectInspector) parameters[0];
+ this.inputInspector_ = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
+ this.lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
- seedInspector_ = (PrimitiveObjectInspector) parameters[2];
+ this.seedInspector_ = (PrimitiveObjectInspector) parameters[2];
}
} else {
// mode = partial2 || final
- intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
+ this.intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
}
if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
@@ -173,19 +173,19 @@
if (!state.isInitialized()) {
initializeState(state, parameters);
}
- final byte[] serializedSketch = (byte[]) inputInspector_.getPrimitiveJavaObject(parameters[0]);
+ final byte[] serializedSketch = (byte[]) this.inputInspector_.getPrimitiveJavaObject(parameters[0]);
if (serializedSketch == null) { return; }
state.update(CpcSketch.heapify(Memory.wrap(serializedSketch), state.getSeed()));
}
private void initializeState(final UnionState state, final Object[] parameters) {
int lgK = DEFAULT_LG_K;
- if (lgKInspector_ != null) {
- lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], lgKInspector_);
+ if (this.lgKInspector_ != null) {
+ lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.lgKInspector_);
}
long seed = DEFAULT_UPDATE_SEED;
- if (seedInspector_ != null) {
- seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], seedInspector_);
+ if (this.seedInspector_ != null) {
+ seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], this.seedInspector_);
}
state.init(lgK, seed);
}
diff --git a/src/main/java/org/apache/datasketches/hive/cpc/UnionState.java b/src/main/java/org/apache/datasketches/hive/cpc/UnionState.java
index 4ca1006..5fe0b67 100644
--- a/src/main/java/org/apache/datasketches/hive/cpc/UnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/cpc/UnionState.java
@@ -28,28 +28,28 @@
@Override
boolean isInitialized() {
- return union_ != null;
+ return this.union_ != null;
}
@Override
void init(final int lgK, final long seed) {
super.init(lgK, seed);
- union_ = new CpcUnion(lgK, seed);
+ this.union_ = new CpcUnion(lgK, seed);
}
void update(final CpcSketch sketch) {
- union_.update(sketch);
+ this.union_.update(sketch);
}
@Override
CpcSketch getResult() {
- if (union_ == null) { return null; }
- return union_.getResult();
+ if (this.union_ == null) { return null; }
+ return this.union_.getResult();
}
@Override
void reset() {
- union_ = null;
+ this.union_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java
index 93da5d6..714f59b 100644
--- a/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java
@@ -84,7 +84,7 @@
// In PARTIAL2 and FINAL mode, the parameters are just partial aggregations.
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
if (parameters.length > 1) {
- maxMapSizeObjectInspector = (PrimitiveObjectInspector) parameters[1];
+ this.maxMapSizeObjectInspector = (PrimitiveObjectInspector) parameters[1];
}
}
@@ -98,10 +98,10 @@
@SuppressWarnings("unchecked")
final ItemsState<T> state = (ItemsState<T>) buf;
if (!state.isInitialized()) {
- final int maxMapSize = PrimitiveObjectInspectorUtils.getInt(data[1], maxMapSizeObjectInspector);
+ final int maxMapSize = PrimitiveObjectInspectorUtils.getInt(data[1], this.maxMapSizeObjectInspector);
state.init(maxMapSize);
}
- state.update(extractValue(data[0], inputObjectInspector));
+ state.update(extractValue(data[0], this.inputObjectInspector));
}
public abstract T extractValue(final Object data, final ObjectInspector objectInspector)
diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java
index 39d8303..1c87f2d 100644
--- a/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java
@@ -49,7 +49,7 @@
@Override
public String extractValue(final Object data, final ObjectInspector objectInspector)
throws HiveException {
- final Object value = inputObjectInspector.getPrimitiveJavaObject(data);
+ final Object value = this.inputObjectInspector.getPrimitiveJavaObject(data);
if (value instanceof String) {
return (String) value;
} else if (value instanceof HiveChar) {
diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java b/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java
index 25a4021..218bf74 100644
--- a/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java
+++ b/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java
@@ -58,10 +58,10 @@
throw new UDFArgumentTypeException(0, "Primitive argument expected, but "
+ inspectors[0].getCategory().name() + " was recieved");
}
- inputObjectInspector = (PrimitiveObjectInspector) inspectors[0];
- if (inputObjectInspector.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
+ this.inputObjectInspector = (PrimitiveObjectInspector) inspectors[0];
+ if (this.inputObjectInspector.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
throw new UDFArgumentTypeException(0, "Binary value expected as the first argument, but "
- + inputObjectInspector.getPrimitiveCategory().name() + " was recieved");
+ + this.inputObjectInspector.getPrimitiveCategory().name() + " was recieved");
}
if (inspectors.length > 1) {
@@ -69,11 +69,11 @@
throw new UDFArgumentTypeException(1, "Primitive argument expected, but "
+ inspectors[1].getCategory().name() + " was recieved");
}
- errorTypeObjectInspector = (PrimitiveObjectInspector) inspectors[1];
- if (errorTypeObjectInspector.getPrimitiveCategory()
+ this.errorTypeObjectInspector = (PrimitiveObjectInspector) inspectors[1];
+ if (this.errorTypeObjectInspector.getPrimitiveCategory()
!= PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentTypeException(1, "String value expected as the first argument, but "
- + errorTypeObjectInspector.getPrimitiveCategory().name() + " was recieved");
+ + this.errorTypeObjectInspector.getPrimitiveCategory().name() + " was recieved");
}
}
@@ -92,12 +92,12 @@
public void process(final Object[] data) throws HiveException {
if (data == null || data[0] == null) { return; }
final BytesWritable serializedSketch =
- (BytesWritable) inputObjectInspector.getPrimitiveWritableObject(data[0]);
+ (BytesWritable) this.inputObjectInspector.getPrimitiveWritableObject(data[0]);
final ItemsSketch<String> sketch = ItemsSketch.getInstance(
BytesWritableHelper.wrapAsMemory(serializedSketch), new ArrayOfStringsSerDe());
ErrorType errorType = ErrorType.NO_FALSE_POSITIVES;
if (data.length > 1) {
- errorType = ErrorType.valueOf((String) errorTypeObjectInspector.getPrimitiveJavaObject(data[1]));
+ errorType = ErrorType.valueOf((String) this.errorTypeObjectInspector.getPrimitiveJavaObject(data[1]));
}
final ItemsSketch.Row<String>[] result = sketch.getFrequentItems(errorType);
for (int i = 0; i < result.length; i++) {
diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java
index 4d5a629..c3154a4 100644
--- a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java
@@ -37,13 +37,13 @@
protected PrimitiveObjectInspector inputObjectInspector;
ItemsEvaluator(final ArrayOfItemsSerDe<T> serDe) {
- serDe_ = serDe;
+ this.serDe_ = serDe;
}
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
+ this.inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
@@ -68,7 +68,7 @@
@SuppressWarnings("unchecked")
final ItemsState<T> state = (ItemsState<T>) buf;
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) inputObjectInspector.getPrimitiveWritableObject(data));
+ (BytesWritable) this.inputObjectInspector.getPrimitiveWritableObject(data));
state.update(serializedSketch);
}
@@ -79,13 +79,13 @@
final ItemsState<T> state = (ItemsState<T>) buf;
final ItemsSketch<T> resultSketch = state.getResult();
if (resultSketch == null) { return null; }
- return new BytesWritable(resultSketch.toByteArray(serDe_));
+ return new BytesWritable(resultSketch.toByteArray(this.serDe_));
}
@SuppressWarnings("deprecation")
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- return new ItemsState<>(serDe_);
+ return new ItemsState<>(this.serDe_);
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java
index 09478c8..08d0243 100644
--- a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java
+++ b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java
@@ -31,39 +31,39 @@
private ItemsSketch<T> sketch;
ItemsState(final ArrayOfItemsSerDe<T> serDe) {
- serDe_ = serDe;
+ this.serDe_ = serDe;
}
// initializing maxMapSize is needed for building sketches using update(value)
// not needed for merging sketches using update(sketch)
void init(final int maxMapSize) {
- maxMapSize_ = maxMapSize;
- sketch = new ItemsSketch<>(maxMapSize_);
+ this.maxMapSize_ = maxMapSize;
+ this.sketch = new ItemsSketch<>(this.maxMapSize_);
}
boolean isInitialized() {
- return sketch != null;
+ return this.sketch != null;
}
void update(final T value) {
- sketch.update(value);
+ this.sketch.update(value);
}
void update(final Memory serializedSketch) {
- final ItemsSketch<T> incomingSketch = ItemsSketch.getInstance(serializedSketch, serDe_);
- if (sketch == null) {
- sketch = incomingSketch;
+ final ItemsSketch<T> incomingSketch = ItemsSketch.getInstance(serializedSketch, this.serDe_);
+ if (this.sketch == null) {
+ this.sketch = incomingSketch;
} else {
- sketch.merge(incomingSketch);
+ this.sketch.merge(incomingSketch);
}
}
public ItemsSketch<T> getResult() {
- return sketch;
+ return this.sketch;
}
void reset() {
- sketch = null;
+ this.sketch = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java
index 6a4da67..e952eb1 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/DataToSketchUDAF.java
@@ -41,6 +41,11 @@
/**
* Hive UDAF to create an HllSketch from raw data.
+ *
+ * <p><b>Note</b> Strings as raw data values are encoded as a UTF-16 VARCHAR
+ * prior to being submitted to the sketch. If the user requires a different
+ * encoding for cross-platform compatibility, it is recommended that these values be encoded prior
+ * to being submitted and then typed as a BINARY byte[].</p>
*/
@Description(
name = "dataToSketch",
@@ -123,7 +128,7 @@
// so the mode_ was null. A solution was implemented to have UnionState, which can work
// in both cases, but SketchState is more space-efficient.
// HLL sketch is about compactness, so let's use SketchState if possible.
- if (mode_ == Mode.PARTIAL1 || mode_ == Mode.COMPLETE) { // iterate() will be used
+ if (this.mode_ == Mode.PARTIAL1 || this.mode_ == Mode.COMPLETE) { // iterate() will be used
return new SketchState();
}
return new UnionState();
@@ -140,19 +145,19 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- mode_ = mode;
+ this.mode_ = mode;
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
- inputInspector_ = (PrimitiveObjectInspector) parameters[0];
+ this.inputInspector_ = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
+ this.lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
- hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
+ this.hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateInspector_ = (StructObjectInspector) parameters[0];
+ this.intermediateInspector_ = (StructObjectInspector) parameters[0];
}
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
@@ -187,17 +192,17 @@
if (!state.isInitialized()) {
initializeState(state, parameters);
}
- state.update(parameters[0], inputInspector_);
+ state.update(parameters[0], this.inputInspector_);
}
private void initializeState(final State state, final Object[] parameters) {
int lgK = DEFAULT_LG_K;
- if (lgKInspector_ != null) {
- lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], lgKInspector_);
+ if (this.lgKInspector_ != null) {
+ lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.lgKInspector_);
}
TgtHllType type = DEFAULT_HLL_TYPE;
- if (hllTypeInspector_ != null) {
- type = TgtHllType.valueOf(PrimitiveObjectInspectorUtils.getString(parameters[2], hllTypeInspector_));
+ if (this.hllTypeInspector_ != null) {
+ type = TgtHllType.valueOf(PrimitiveObjectInspectorUtils.getString(parameters[2], this.hllTypeInspector_));
}
state.init(lgK, type);
}
diff --git a/src/main/java/org/apache/datasketches/hive/hll/SketchEvaluator.java b/src/main/java/org/apache/datasketches/hive/hll/SketchEvaluator.java
index 083f9ac..3cb63b9 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/SketchEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/SketchEvaluator.java
@@ -70,16 +70,16 @@
if (!state.isInitialized()) {
initializeState(state, data);
}
- final BytesWritable serializedSketch = (BytesWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
+ final BytesWritable serializedSketch = (BytesWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
state.update(HllSketch.wrap(BytesWritableHelper.wrapAsMemory(serializedSketch)));
}
private void initializeState(final UnionState state, final Object data) {
- final int lgK = ((IntWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(LG_K_FIELD))).get();
- final TgtHllType type = TgtHllType.valueOf(((Text) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(HLL_TYPE_FIELD))).toString());
+ final int lgK = ((IntWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(LG_K_FIELD))).get();
+ final TgtHllType type = TgtHllType.valueOf(((Text) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(HLL_TYPE_FIELD))).toString());
state.init(lgK, type);
}
diff --git a/src/main/java/org/apache/datasketches/hive/hll/SketchState.java b/src/main/java/org/apache/datasketches/hive/hll/SketchState.java
index c002a47..8cf58d6 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/SketchState.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/SketchState.java
@@ -30,48 +30,48 @@
@Override
boolean isInitialized() {
- return sketch_ != null;
+ return this.sketch_ != null;
}
@Override
void init(final int logK, final TgtHllType type) {
super.init(logK, type);
- sketch_ = new HllSketch(logK, type);
+ this.sketch_ = new HllSketch(logK, type);
}
@Override
void update(final Object data, final PrimitiveObjectInspector objectInspector) {
switch (objectInspector.getPrimitiveCategory()) {
case BINARY:
- sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data, objectInspector)
.copyBytes());
return;
case BYTE:
- sketch_.update(PrimitiveObjectInspectorUtils.getByte(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getByte(data, objectInspector));
return;
case DOUBLE:
- sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data, objectInspector));
return;
case FLOAT:
- sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data, objectInspector));
return;
case INT:
- sketch_.update(PrimitiveObjectInspectorUtils.getInt(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getInt(data, objectInspector));
return;
case LONG:
- sketch_.update(PrimitiveObjectInspectorUtils.getLong(data, objectInspector));
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getLong(data, objectInspector));
return;
case STRING:
// conversion to char[] avoids costly UTF-8 encoding
- sketch_.update(PrimitiveObjectInspectorUtils.getString(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getString(data, objectInspector)
.toCharArray());
return;
case CHAR:
- sketch_.update(PrimitiveObjectInspectorUtils.getHiveChar(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getHiveChar(data, objectInspector)
.getValue().toCharArray());
return;
case VARCHAR:
- sketch_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(data, objectInspector)
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(data, objectInspector)
.getValue().toCharArray());
return;
default:
@@ -84,13 +84,13 @@
@Override
HllSketch getResult() {
- if (sketch_ == null) { return null; }
- return sketch_;
+ if (this.sketch_ == null) { return null; }
+ return this.sketch_;
}
@Override
void reset() {
- sketch_ = null;
+ this.sketch_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/hll/State.java b/src/main/java/org/apache/datasketches/hive/hll/State.java
index fa9e5df..c6e43ca 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/State.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/State.java
@@ -30,16 +30,16 @@
private TgtHllType type_;
void init(final int lgK, final TgtHllType type) {
- lgK_ = lgK;
- type_ = type;
+ this.lgK_ = lgK;
+ this.type_ = type;
}
int getLgK() {
- return lgK_;
+ return this.lgK_;
}
TgtHllType getType() {
- return type_;
+ return this.type_;
}
abstract boolean isInitialized();
diff --git a/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java
index 7563fc9..ac96c9f 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/UnionSketchUDAF.java
@@ -130,16 +130,16 @@
super.init(mode, parameters);
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- inputInspector_ = (PrimitiveObjectInspector) parameters[0];
+ this.inputInspector_ = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
+ this.lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
- hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
+ this.hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
}
} else {
// mode = partial2 || final
- intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
+ this.intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
}
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
@@ -173,19 +173,19 @@
if (!state.isInitialized()) {
initializeState(state, parameters);
}
- final byte[] serializedSketch = (byte[]) inputInspector_.getPrimitiveJavaObject(parameters[0]);
+ final byte[] serializedSketch = (byte[]) this.inputInspector_.getPrimitiveJavaObject(parameters[0]);
if (serializedSketch == null) { return; }
state.update(HllSketch.wrap(Memory.wrap(serializedSketch)));
}
private void initializeState(final UnionState state, final Object[] parameters) {
int lgK = DEFAULT_LG_K;
- if (lgKInspector_ != null) {
- lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], lgKInspector_);
+ if (this.lgKInspector_ != null) {
+ lgK = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.lgKInspector_);
}
TgtHllType type = DEFAULT_HLL_TYPE;
- if (hllTypeInspector_ != null) {
- type = TgtHllType.valueOf(PrimitiveObjectInspectorUtils.getString(parameters[2], hllTypeInspector_));
+ if (this.hllTypeInspector_ != null) {
+ type = TgtHllType.valueOf(PrimitiveObjectInspectorUtils.getString(parameters[2], this.hllTypeInspector_));
}
state.init(lgK, type);
}
diff --git a/src/main/java/org/apache/datasketches/hive/hll/UnionState.java b/src/main/java/org/apache/datasketches/hive/hll/UnionState.java
index 89d8e06..42ff126 100644
--- a/src/main/java/org/apache/datasketches/hive/hll/UnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/hll/UnionState.java
@@ -31,48 +31,48 @@
@Override
boolean isInitialized() {
- return union_ != null;
+ return this.union_ != null;
}
@Override
void init(final int lgK, final TgtHllType type) {
super.init(lgK, type);
- union_ = new Union(lgK);
+ this.union_ = new Union(lgK);
}
@Override
void update(final Object data, final PrimitiveObjectInspector objectInspector) {
switch (objectInspector.getPrimitiveCategory()) {
case BINARY:
- union_.update(PrimitiveObjectInspectorUtils.getBinary(data, objectInspector)
+ this.union_.update(PrimitiveObjectInspectorUtils.getBinary(data, objectInspector)
.copyBytes());
return;
case BYTE:
- union_.update(PrimitiveObjectInspectorUtils.getByte(data, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getByte(data, objectInspector));
return;
case DOUBLE:
- union_.update(PrimitiveObjectInspectorUtils.getDouble(data, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getDouble(data, objectInspector));
return;
case FLOAT:
- union_.update(PrimitiveObjectInspectorUtils.getFloat(data, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getFloat(data, objectInspector));
return;
case INT:
- union_.update(PrimitiveObjectInspectorUtils.getInt(data, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getInt(data, objectInspector));
return;
case LONG:
- union_.update(PrimitiveObjectInspectorUtils.getLong(data, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getLong(data, objectInspector));
return;
case STRING:
// conversion to char[] avoids costly UTF-8 encoding
- union_.update(PrimitiveObjectInspectorUtils.getString(data, objectInspector)
+ this.union_.update(PrimitiveObjectInspectorUtils.getString(data, objectInspector)
.toCharArray());
return;
case CHAR:
- union_.update(PrimitiveObjectInspectorUtils.getHiveChar(data, objectInspector)
+ this.union_.update(PrimitiveObjectInspectorUtils.getHiveChar(data, objectInspector)
.getValue().toCharArray());
return;
case VARCHAR:
- union_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(data, objectInspector)
+ this.union_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(data, objectInspector)
.getValue().toCharArray());
return;
default:
@@ -84,18 +84,18 @@
}
void update(final HllSketch sketch) {
- union_.update(sketch);
+ this.union_.update(sketch);
}
@Override
HllSketch getResult() {
- if (union_ == null) { return null; }
- return union_.getResult(getType());
+ if (this.union_ == null) { return null; }
+ return this.union_.getResult(getType());
}
@Override
void reset() {
- union_ = null;
+ this.union_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/kll/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/kll/DataToSketchUDAF.java
index f77db2a..bb7e2a5 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/DataToSketchUDAF.java
@@ -62,13 +62,13 @@
if (data[0] == null) { return; }
final SketchState state = (SketchState) buf;
if (!state.isInitialized()) {
- if (kInspector_ != null) {
- state.init(PrimitiveObjectInspectorUtils.getInt(data[1], kInspector_));
+ if (this.kInspector_ != null) {
+ state.init(PrimitiveObjectInspectorUtils.getInt(data[1], this.kInspector_));
} else {
state.init();
}
}
- final float value = (float) inputInspector_.getPrimitiveJavaObject(data[0]);
+ final float value = (float) this.inputInspector_.getPrimitiveJavaObject(data[0]);
state.update(value);
}
diff --git a/src/main/java/org/apache/datasketches/hive/kll/SketchEvaluator.java b/src/main/java/org/apache/datasketches/hive/kll/SketchEvaluator.java
index d5ec827..c71d88a 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/SketchEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/SketchEvaluator.java
@@ -38,14 +38,14 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- inputInspector_ = (PrimitiveObjectInspector) parameters[0];
+ this.inputInspector_ = (PrimitiveObjectInspector) parameters[0];
// Parameters:
// In PARTIAL1 and COMPLETE mode, the parameters are original data.
// In PARTIAL2 and FINAL mode, the parameters are partial aggregations.
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
if (parameters.length > 1) {
- kInspector_ = (PrimitiveObjectInspector) parameters[1];
+ this.kInspector_ = (PrimitiveObjectInspector) parameters[1];
}
}
@@ -71,7 +71,7 @@
if (data == null) { return; }
final SketchState state = (SketchState) buf;
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) inputInspector_.getPrimitiveWritableObject(data));
+ (BytesWritable) this.inputInspector_.getPrimitiveWritableObject(data));
state.update(KllFloatsSketch.heapify(serializedSketch));
}
diff --git a/src/main/java/org/apache/datasketches/hive/kll/SketchState.java b/src/main/java/org/apache/datasketches/hive/kll/SketchState.java
index d8e2003..667183e 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/SketchState.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/SketchState.java
@@ -28,35 +28,35 @@
// initialization is needed in the first phase (iterate) only
void init() {
- state_ = new KllFloatsSketch();
+ this.state_ = new KllFloatsSketch();
}
void init(final int k) {
- state_ = new KllFloatsSketch(k);
+ this.state_ = new KllFloatsSketch(k);
}
boolean isInitialized() {
- return state_ != null;
+ return this.state_ != null;
}
void update(final float value) {
- state_.update(value);
+ this.state_.update(value);
}
void update(final KllFloatsSketch sketch) {
- if (state_ == null) {
- state_ = sketch;
+ if (this.state_ == null) {
+ this.state_ = sketch;
} else {
- state_.merge(sketch);
+ this.state_.merge(sketch);
}
}
public KllFloatsSketch getResult() {
- return state_;
+ return this.state_;
}
void reset() {
- state_ = null;
+ this.state_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/kll/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/kll/UnionSketchUDAF.java
index 0fa69bd..86692f1 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/UnionSketchUDAF.java
@@ -57,8 +57,8 @@
if (data[0] == null) { return; }
final SketchState state = (SketchState) buf;
if (!state.isInitialized()) {
- if (kInspector_ != null) {
- state.init(PrimitiveObjectInspectorUtils.getInt(data[1], kInspector_));
+ if (this.kInspector_ != null) {
+ state.init(PrimitiveObjectInspectorUtils.getInt(data[1], this.kInspector_));
} else {
state.init();
}
diff --git a/src/main/java/org/apache/datasketches/hive/kll/Util.java b/src/main/java/org/apache/datasketches/hive/kll/Util.java
index 1886ef7..deb73f8 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/Util.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/Util.java
@@ -41,13 +41,13 @@
}
static List<Float> primitivesToList(final float[] array) {
- final List<Float> result = new ArrayList<Float>(array.length);
+ final List<Float> result = new ArrayList<>(array.length);
for (float item: array) { result.add(item); }
return result;
}
static List<Double> primitivesToList(final double[] array) {
- final List<Double> result = new ArrayList<Double>(array.length);
+ final List<Double> result = new ArrayList<>(array.length);
for (double item: array) { result.add(item); }
return result;
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DataToDoublesSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/DataToDoublesSketchUDAF.java
index 8860da6..3d11269 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/DataToDoublesSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/DataToDoublesSketchUDAF.java
@@ -61,11 +61,11 @@
public void iterate(final AggregationBuffer buf, final Object[] data) throws HiveException {
if (data[0] == null) { return; }
final DoublesUnionState state = (DoublesUnionState) buf;
- if (!state.isInitialized() && (kObjectInspector != null)) {
- final int k = PrimitiveObjectInspectorUtils.getInt(data[1], kObjectInspector);
+ if (!state.isInitialized() && (this.kObjectInspector != null)) {
+ final int k = PrimitiveObjectInspectorUtils.getInt(data[1], this.kObjectInspector);
state.init(k);
}
- final double value = (double) inputObjectInspector.getPrimitiveJavaObject(data[0]);
+ final double value = (double) this.inputObjectInspector.getPrimitiveJavaObject(data[0]);
state.update(value);
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java
index b7424a5..d69c58b 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java
@@ -66,11 +66,11 @@
if (data[0] == null) { return; }
@SuppressWarnings("unchecked")
final ItemsUnionState<T> state = (ItemsUnionState<T>) buf;
- if (!state.isInitialized() && (kObjectInspector != null)) {
- final int k = PrimitiveObjectInspectorUtils.getInt(data[1], kObjectInspector);
+ if (!state.isInitialized() && (this.kObjectInspector != null)) {
+ final int k = PrimitiveObjectInspectorUtils.getInt(data[1], this.kObjectInspector);
state.init(k);
}
- state.update(extractValue(data[0], inputObjectInspector));
+ state.update(extractValue(data[0], this.inputObjectInspector));
}
public abstract T extractValue(final Object data, final ObjectInspector objectInspector)
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java
index 4b21eda..ca495dd 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java
@@ -52,7 +52,7 @@
@Override
public String extractValue(final Object data, final ObjectInspector objectInspector)
throws HiveException {
- final Object value = inputObjectInspector.getPrimitiveJavaObject(data);
+ final Object value = this.inputObjectInspector.getPrimitiveJavaObject(data);
if (value instanceof String) {
return (String) value;
} else if (value instanceof HiveChar) {
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DoublesEvaluator.java b/src/main/java/org/apache/datasketches/hive/quantiles/DoublesEvaluator.java
index 62b7947..e893498 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/DoublesEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/DoublesEvaluator.java
@@ -38,14 +38,14 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
+ this.inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
// Parameters:
// In PARTIAL1 and COMPLETE mode, the parameters are original data.
// In PARTIAL2 and FINAL mode, the parameters are partial aggregations.
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
if (parameters.length > 1) {
- kObjectInspector = (PrimitiveObjectInspector) parameters[1];
+ this.kObjectInspector = (PrimitiveObjectInspector) parameters[1];
}
}
@@ -71,7 +71,7 @@
if (data == null) { return; }
final DoublesUnionState state = (DoublesUnionState) buf;
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) inputObjectInspector.getPrimitiveWritableObject(data));
+ (BytesWritable) this.inputObjectInspector.getPrimitiveWritableObject(data));
state.update(serializedSketch);
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java b/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java
index 653f2ce..cb6f03c 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java
@@ -33,36 +33,36 @@
void init(final int k) {
final DoublesUnionBuilder unionBuilder = DoublesUnion.builder();
if (k > 0) { unionBuilder.setMaxK(k); }
- union = unionBuilder.build();
+ this.union = unionBuilder.build();
}
boolean isInitialized() {
- return union != null;
+ return this.union != null;
}
void update(final double value) {
- if (union == null) {
- union = DoublesUnion.builder().build();
+ if (this.union == null) {
+ this.union = DoublesUnion.builder().build();
}
- union.update(value);
+ this.union.update(value);
}
void update(final Memory serializedSketch) {
final DoublesSketch incomingSketch = DoublesSketch.wrap(serializedSketch);
- if (union == null) {
- union = DoublesUnion.heapify(incomingSketch);
+ if (this.union == null) {
+ this.union = DoublesUnion.heapify(incomingSketch);
} else {
- union.update(incomingSketch);
+ this.union.update(incomingSketch);
}
}
public DoublesSketch getResult() {
- if (union == null) { return null; }
- return union.getResultAndReset();
+ if (this.union == null) { return null; }
+ return this.union.getResultAndReset();
}
void reset() {
- union = null;
+ this.union = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java
index b2343ef..4713974 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java
@@ -41,21 +41,21 @@
protected PrimitiveObjectInspector kObjectInspector;
ItemsEvaluator(final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) {
- comparator_ = comparator;
- serDe_ = serDe;
+ this.comparator_ = comparator;
+ this.serDe_ = serDe;
}
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
+ this.inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
// Parameters:
// In PARTIAL1 and COMPLETE mode, the parameters are original data.
// In PARTIAL2 and FINAL mode, the parameters are partial aggregations.
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
if (parameters.length > 1) {
- kObjectInspector = (PrimitiveObjectInspector) parameters[1];
+ this.kObjectInspector = (PrimitiveObjectInspector) parameters[1];
}
}
@@ -83,7 +83,7 @@
@SuppressWarnings("unchecked")
final ItemsUnionState<T> state = (ItemsUnionState<T>) buf;
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) inputObjectInspector.getPrimitiveWritableObject(data));
+ (BytesWritable) this.inputObjectInspector.getPrimitiveWritableObject(data));
state.update(serializedSketch);
}
@@ -94,13 +94,13 @@
final ItemsUnionState<T> state = (ItemsUnionState<T>) buf;
final ItemsSketch<T> resultSketch = state.getResult();
if (resultSketch == null) { return null; }
- return new BytesWritable(resultSketch.toByteArray(serDe_));
+ return new BytesWritable(resultSketch.toByteArray(this.serDe_));
}
@SuppressWarnings("deprecation")
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- return new ItemsUnionState<T>(comparator_, serDe_);
+ return new ItemsUnionState<>(this.comparator_, this.serDe_);
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java
index 9e7ee31..b7004d3 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java
@@ -34,47 +34,47 @@
private ItemsUnion<T> union;
ItemsUnionState(final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) {
- comparator_ = comparator;
- serDe_ = serDe;
+ this.comparator_ = comparator;
+ this.serDe_ = serDe;
}
// initializing is needed only in the first phase (iterate)
void init(final int k) {
if (k > 0) {
- union = ItemsUnion.getInstance(k, comparator_);
+ this.union = ItemsUnion.getInstance(k, this.comparator_);
} else {
- union = ItemsUnion.getInstance(comparator_);
+ this.union = ItemsUnion.getInstance(this.comparator_);
}
}
boolean isInitialized() {
- return union != null;
+ return this.union != null;
}
void update(final T value) {
- if (union == null) {
- union = ItemsUnion.getInstance(comparator_);
+ if (this.union == null) {
+ this.union = ItemsUnion.getInstance(this.comparator_);
}
- union.update(value);
+ this.union.update(value);
}
void update(final Memory serializedSketch) {
final ItemsSketch<T> incomingSketch =
- ItemsSketch.getInstance(serializedSketch, comparator_, serDe_);
- if (union == null) {
- union = ItemsUnion.getInstance(incomingSketch);
+ ItemsSketch.getInstance(serializedSketch, this.comparator_, this.serDe_);
+ if (this.union == null) {
+ this.union = ItemsUnion.getInstance(incomingSketch);
} else {
- union.update(incomingSketch);
+ this.union.update(incomingSketch);
}
}
public ItemsSketch<T> getResult() {
- if (union == null) { return null; }
- return union.getResultAndReset();
+ if (this.union == null) { return null; }
+ return this.union.getResultAndReset();
}
void reset() {
- union = null;
+ this.union = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/UnionDoublesSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/UnionDoublesSketchUDAF.java
index d746a54..911b6e8 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/UnionDoublesSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/UnionDoublesSketchUDAF.java
@@ -58,8 +58,8 @@
final DoublesUnionState state = (DoublesUnionState) buf;
if (!state.isInitialized()) {
int k = 0;
- if (kObjectInspector != null) {
- k = PrimitiveObjectInspectorUtils.getInt(data[1], kObjectInspector);
+ if (this.kObjectInspector != null) {
+ k = PrimitiveObjectInspectorUtils.getInt(data[1], this.kObjectInspector);
}
state.init(k);
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java
index 3c93a5c..ae53ef7 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java
@@ -68,8 +68,8 @@
final ItemsUnionState<T> state = (ItemsUnionState<T>) buf;
if (!state.isInitialized()) {
int k = 0;
- if (kObjectInspector != null) {
- k = PrimitiveObjectInspectorUtils.getInt(data[1], kObjectInspector);
+ if (this.kObjectInspector != null) {
+ k = PrimitiveObjectInspectorUtils.getInt(data[1], this.kObjectInspector);
}
state.init(k);
}
diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/Util.java b/src/main/java/org/apache/datasketches/hive/quantiles/Util.java
index 2aea43c..aea22b5 100644
--- a/src/main/java/org/apache/datasketches/hive/quantiles/Util.java
+++ b/src/main/java/org/apache/datasketches/hive/quantiles/Util.java
@@ -33,7 +33,7 @@
}
static List<Double> primitivesToList(final double[] array) {
- final List<Double> result = new ArrayList<Double>(array.length);
+ final List<Double> result = new ArrayList<>(array.length);
for (double item: array) { result.add(item); }
return result;
}
diff --git a/src/main/java/org/apache/datasketches/hive/theta/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/theta/DataToSketchUDAF.java
index 581f5a6..1b579d3 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/DataToSketchUDAF.java
@@ -41,6 +41,12 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+/**
+ * <p><b>Note</b> Strings as raw data values are encoded as a UTF-16 VARCHAR
+ * prior to being submitted to the sketch. If the user requires a different
+ * encoding for cross-platform compatibility, it is recommended that these values be encoded prior
+ * to being submitted and then typed as a BINARY byte[].</p>
+ */
@Description(
name = "dataToSketch",
value = "_FUNC_(expr, size, prob, seed) - "
@@ -141,24 +147,24 @@
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
- inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
+ this.inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- nominalEntriesObjectInspector = (PrimitiveObjectInspector) parameters[1];
+ this.nominalEntriesObjectInspector = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
- samplingProbabilityObjectInspector = (PrimitiveObjectInspector) parameters[2];
+ this.samplingProbabilityObjectInspector = (PrimitiveObjectInspector) parameters[2];
}
if (parameters.length > 3) {
- seedObjectInspector = (PrimitiveObjectInspector) parameters[3];
+ this.seedObjectInspector = (PrimitiveObjectInspector) parameters[3];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateObjectInspector = (StructObjectInspector) parameters[0];
+ this.intermediateObjectInspector = (StructObjectInspector) parameters[0];
}
- if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
// intermediate results need to include the the nominal number of entries and the seed
return ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(NOMINAL_ENTRIES_FIELD, SEED_FIELD, SKETCH_FIELD),
@@ -190,22 +196,22 @@
if (!state.isInitialized()) {
initializeState(state, parameters);
}
- state.update(parameters[0], inputObjectInspector);
+ state.update(parameters[0], this.inputObjectInspector);
}
private void initializeState(final UnionState state, final Object[] parameters) {
int sketchSize = DEFAULT_NOMINAL_ENTRIES;
- if (nominalEntriesObjectInspector != null) {
- sketchSize = PrimitiveObjectInspectorUtils.getInt(parameters[1], nominalEntriesObjectInspector);
+ if (this.nominalEntriesObjectInspector != null) {
+ sketchSize = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.nominalEntriesObjectInspector);
}
float samplingProbability = UnionState.DEFAULT_SAMPLING_PROBABILITY;
- if (samplingProbabilityObjectInspector != null) {
+ if (this.samplingProbabilityObjectInspector != null) {
samplingProbability = PrimitiveObjectInspectorUtils.getFloat(parameters[2],
- samplingProbabilityObjectInspector);
+ this.samplingProbabilityObjectInspector);
}
long seed = DEFAULT_UPDATE_SEED;
- if (seedObjectInspector != null) {
- seed = PrimitiveObjectInspectorUtils.getLong(parameters[3], seedObjectInspector);
+ if (this.seedObjectInspector != null) {
+ seed = PrimitiveObjectInspectorUtils.getLong(parameters[3], this.seedObjectInspector);
}
state.init(sketchSize, samplingProbability, seed);
}
diff --git a/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java
index 2b5b2d0..8eb4755 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/ExcludeSketchUDF.java
@@ -61,8 +61,7 @@
}
final AnotB anotb = SetOperation.builder().setSeed(hashSeed).buildANotB();
- anotb.update(firstSketch, secondSketch);
- final byte[] excludeSketchBytes = anotb.getResult().toByteArray();
+ final byte[] excludeSketchBytes = anotb.aNotB(firstSketch, secondSketch).toByteArray();
final BytesWritable result = new BytesWritable();
result.set(excludeSketchBytes, 0, excludeSketchBytes.length);
return result;
diff --git a/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDAF.java
index cda0f75..26b2b01 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDAF.java
@@ -93,16 +93,16 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
- inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ this.inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- seedObjectInspector = (PrimitiveObjectInspector) parameters[1];
+ this.seedObjectInspector = (PrimitiveObjectInspector) parameters[1];
}
} else {
- intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
+ this.intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
}
- if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
// intermediate results need to include the seed
return ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(SEED_FIELD, SKETCH_FIELD),
@@ -125,12 +125,12 @@
final IntersectionState state = (IntersectionState) buf;
if (!state.isInitialized()) {
long seed = DEFAULT_UPDATE_SEED;
- if (seedObjectInspector != null) {
- seed = PrimitiveObjectInspectorUtils.getLong(data[1], seedObjectInspector);
+ if (this.seedObjectInspector != null) {
+ seed = PrimitiveObjectInspectorUtils.getLong(data[1], this.seedObjectInspector);
}
state.init(seed);
}
- final byte[] serializedSketch = (byte[]) inputObjectInspector.getPrimitiveJavaObject(data[0]);
+ final byte[] serializedSketch = (byte[]) this.inputObjectInspector.getPrimitiveJavaObject(data[0]);
if (serializedSketch == null) { return; }
state.update(Memory.wrap(serializedSketch));
}
@@ -154,14 +154,14 @@
if (data == null) { return; }
final IntersectionState state = (IntersectionState) buf;
if (!state.isInitialized()) {
- final long seed = ((LongWritable) intermediateObjectInspector.getStructFieldData(
- data, intermediateObjectInspector.getStructFieldRef(SEED_FIELD))).get();
+ final long seed = ((LongWritable) this.intermediateObjectInspector.getStructFieldData(
+ data, this.intermediateObjectInspector.getStructFieldRef(SEED_FIELD))).get();
state.init(seed);
}
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) intermediateObjectInspector.getStructFieldData(
- data, intermediateObjectInspector.getStructFieldRef(SKETCH_FIELD)));
+ (BytesWritable) this.intermediateObjectInspector.getStructFieldData(
+ data, this.intermediateObjectInspector.getStructFieldRef(SKETCH_FIELD)));
state.update(serializedSketch);
}
@@ -192,29 +192,29 @@
private Intersection intersection_;
boolean isInitialized() {
- return intersection_ != null;
+ return this.intersection_ != null;
}
void init(final long seed) {
- seed_ = seed;
- intersection_ = SetOperation.builder().setSeed(seed).buildIntersection();
+ this.seed_ = seed;
+ this.intersection_ = SetOperation.builder().setSeed(seed).buildIntersection();
}
long getSeed() {
- return seed_;
+ return this.seed_;
}
void update(final Memory serializedSketch) {
- intersection_.update(Sketches.wrapSketch(serializedSketch, seed_));
+ this.intersection_.intersect(Sketches.wrapSketch(serializedSketch, this.seed_));
}
Sketch getResult() {
- if (intersection_ == null) { return null; }
- return intersection_.getResult();
+ if (this.intersection_ == null) { return null; }
+ return this.intersection_.getResult();
}
void reset() {
- intersection_ = null;
+ this.intersection_ = null;
}
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java
index 52ba553..6c4c234 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/IntersectSketchUDF.java
@@ -59,9 +59,7 @@
}
final Intersection intersect = SetOperation.builder().setSeed(hashSeed).buildIntersection();
- intersect.update(firstSketch);
- intersect.update(secondSketch);
- return new BytesWritable(intersect.getResult().toByteArray());
+ return new BytesWritable(intersect.intersect(firstSketch, secondSketch).toByteArray());
}
/**
diff --git a/src/main/java/org/apache/datasketches/hive/theta/UnionEvaluator.java b/src/main/java/org/apache/datasketches/hive/theta/UnionEvaluator.java
index ac86461..c09141e 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/UnionEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/UnionEvaluator.java
@@ -90,16 +90,16 @@
initializeState(state, partial);
}
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) intermediateObjectInspector.getStructFieldData(
- partial, intermediateObjectInspector.getStructFieldRef(SKETCH_FIELD)));
+ (BytesWritable) this.intermediateObjectInspector.getStructFieldData(
+ partial, this.intermediateObjectInspector.getStructFieldRef(SKETCH_FIELD)));
state.update(serializedSketch);
}
private void initializeState(final UnionState state, final Object partial) {
- final int nominalEntries = ((IntWritable) intermediateObjectInspector.getStructFieldData(
- partial, intermediateObjectInspector.getStructFieldRef(NOMINAL_ENTRIES_FIELD))).get();
- final long seed = ((LongWritable) intermediateObjectInspector.getStructFieldData(
- partial, intermediateObjectInspector.getStructFieldRef(SEED_FIELD))).get();
+ final int nominalEntries = ((IntWritable) this.intermediateObjectInspector.getStructFieldData(
+ partial, this.intermediateObjectInspector.getStructFieldRef(NOMINAL_ENTRIES_FIELD))).get();
+ final long seed = ((LongWritable) this.intermediateObjectInspector.getStructFieldData(
+ partial, this.intermediateObjectInspector.getStructFieldRef(SEED_FIELD))).get();
state.init(nominalEntries, seed);
}
diff --git a/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDAF.java
index 74b7ec0..893d857 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDAF.java
@@ -117,16 +117,16 @@
super.init(mode, parameters);
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
+ this.inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
- nominalEntriesObjectInspector = (PrimitiveObjectInspector) parameters[1];
+ this.nominalEntriesObjectInspector = (PrimitiveObjectInspector) parameters[1];
}
if (parameters.length > 2) {
- seedObjectInspector = (PrimitiveObjectInspector) parameters[2];
+ this.seedObjectInspector = (PrimitiveObjectInspector) parameters[2];
}
} else {
// mode = partial2 || final
- intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
+ this.intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
}
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
@@ -160,19 +160,19 @@
if (!state.isInitialized()) {
initializeState(state, parameters);
}
- final byte[] serializedSketch = (byte[]) inputObjectInspector.getPrimitiveJavaObject(parameters[0]);
+ final byte[] serializedSketch = (byte[]) this.inputObjectInspector.getPrimitiveJavaObject(parameters[0]);
if (serializedSketch == null) { return; }
state.update(Memory.wrap(serializedSketch));
}
private void initializeState(final UnionState state, final Object[] parameters) {
int nominalEntries = DEFAULT_NOMINAL_ENTRIES;
- if (nominalEntriesObjectInspector != null) {
- nominalEntries = PrimitiveObjectInspectorUtils.getInt(parameters[1], nominalEntriesObjectInspector);
+ if (this.nominalEntriesObjectInspector != null) {
+ nominalEntries = PrimitiveObjectInspectorUtils.getInt(parameters[1], this.nominalEntriesObjectInspector);
}
long seed = DEFAULT_UPDATE_SEED;
- if (seedObjectInspector != null) {
- seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], seedObjectInspector);
+ if (this.seedObjectInspector != null) {
+ seed = PrimitiveObjectInspectorUtils.getLong(parameters[2], this.seedObjectInspector);
}
state.init(nominalEntries, seed);
}
diff --git a/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java b/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java
index 2076ca6..41256d7 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/UnionSketchUDF.java
@@ -54,12 +54,12 @@
final Union union = SetOperation.builder().setSeed(seed).setNominalEntries(sketchSize).buildUnion();
- if ((firstSketch != null) && (firstSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES)) {
- union.update(BytesWritableHelper.wrapAsMemory(firstSketch));
+ if (firstSketch != null && firstSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES) {
+ union.union(BytesWritableHelper.wrapAsMemory(firstSketch));
}
- if ((secondSketch != null) && (secondSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES)) {
- union.update(BytesWritableHelper.wrapAsMemory(secondSketch));
+ if (secondSketch != null && secondSketch.getLength() >= EMPTY_SKETCH_SIZE_BYTES) {
+ union.union(BytesWritableHelper.wrapAsMemory(secondSketch));
}
return new BytesWritable(union.getResult().toByteArray());
diff --git a/src/main/java/org/apache/datasketches/hive/theta/UnionState.java b/src/main/java/org/apache/datasketches/hive/theta/UnionState.java
index 177da22..2ec365b 100644
--- a/src/main/java/org/apache/datasketches/hive/theta/UnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/theta/UnionState.java
@@ -36,7 +36,7 @@
private Union union_;
public boolean isInitialized() {
- return union_ != null;
+ return this.union_ != null;
}
// sampling probability is not relevant for merging
@@ -45,52 +45,52 @@
}
public void init(final int nominalEntries, final float samplingProbability, final long seed) {
- nominalEntries_ = nominalEntries;
- seed_ = seed;
- union_ = SetOperation.builder().setNominalEntries(nominalEntries).setP(samplingProbability)
+ this.nominalEntries_ = nominalEntries;
+ this.seed_ = seed;
+ this.union_ = SetOperation.builder().setNominalEntries(nominalEntries).setP(samplingProbability)
.setSeed(seed).buildUnion();
}
public int getNominalEntries() {
- return nominalEntries_;
+ return this.nominalEntries_;
}
public long getSeed() {
- return seed_;
+ return this.seed_;
}
public void update(final Memory mem) {
- union_.update(mem);
+ this.union_.union(mem);
}
public void update(final Object value, final PrimitiveObjectInspector objectInspector) {
switch (objectInspector.getPrimitiveCategory()) {
case BINARY:
- union_.update(PrimitiveObjectInspectorUtils.getBinary(value, objectInspector).copyBytes());
+ this.union_.update(PrimitiveObjectInspectorUtils.getBinary(value, objectInspector).copyBytes());
return;
case BYTE:
- union_.update(PrimitiveObjectInspectorUtils.getByte(value, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getByte(value, objectInspector));
return;
case DOUBLE:
- union_.update(PrimitiveObjectInspectorUtils.getDouble(value, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getDouble(value, objectInspector));
return;
case FLOAT:
- union_.update(PrimitiveObjectInspectorUtils.getFloat(value, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getFloat(value, objectInspector));
return;
case INT:
- union_.update(PrimitiveObjectInspectorUtils.getInt(value, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getInt(value, objectInspector));
return;
case LONG:
- union_.update(PrimitiveObjectInspectorUtils.getLong(value, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getLong(value, objectInspector));
return;
case STRING:
- union_.update(PrimitiveObjectInspectorUtils.getString(value, objectInspector));
+ this.union_.update(PrimitiveObjectInspectorUtils.getString(value, objectInspector));
return;
case CHAR:
- union_.update(PrimitiveObjectInspectorUtils.getHiveChar(value, objectInspector).getValue());
+ this.union_.update(PrimitiveObjectInspectorUtils.getHiveChar(value, objectInspector).getValue());
return;
case VARCHAR:
- union_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(value, objectInspector).getValue());
+ this.union_.update(PrimitiveObjectInspectorUtils.getHiveVarchar(value, objectInspector).getValue());
return;
default:
throw new IllegalArgumentException(
@@ -101,12 +101,12 @@
}
public Sketch getResult() {
- if (union_ == null) { return null; }
- return union_.getResult();
+ if (this.union_ == null) { return null; }
+ return this.union_.getResult();
}
public void reset() {
- union_ = null;
+ this.union_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchEvaluator.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchEvaluator.java
index 6f00015..b27b4ec 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchEvaluator.java
@@ -23,8 +23,8 @@
import org.apache.datasketches.hive.common.BytesWritableHelper;
import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -64,16 +64,16 @@
initializeState(state, data);
}
final Memory serializedSketch = BytesWritableHelper.wrapAsMemory(
- (BytesWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SKETCH_FIELD)));
+ (BytesWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SKETCH_FIELD)));
state.update(ArrayOfDoublesSketches.wrapSketch(serializedSketch));
}
private void initializeState(final ArrayOfDoublesUnionState state, final Object data) {
- final int nominalNumEntries = ((IntWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(NOMINAL_NUM_ENTRIES_FIELD))).get();
- final int numValues = ((IntWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(NUM_VALUES_FIELD))).get();
+ final int nominalNumEntries = ((IntWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(NOMINAL_NUM_ENTRIES_FIELD))).get();
+ final int numValues = ((IntWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(NUM_VALUES_FIELD))).get();
state.init(nominalNumEntries, numValues);
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchState.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchState.java
index bd1930f..0286ef8 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchState.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchState.java
@@ -19,9 +19,9 @@
package org.apache.datasketches.hive.tuple;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
@@ -30,12 +30,12 @@
private ArrayOfDoublesUpdatableSketch sketch_;
boolean isInitialized() {
- return sketch_ != null;
+ return this.sketch_ != null;
}
void init(final int nominalNumEntries, final float samplingProbability, final int numValues) {
super.init(nominalNumEntries, numValues);
- sketch_ = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalNumEntries)
+ this.sketch_ = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalNumEntries)
.setSamplingProbability(samplingProbability).setNumberOfValues(numValues).build();
}
@@ -47,25 +47,25 @@
}
switch (keyInspector.getPrimitiveCategory()) {
case BINARY:
- sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data[0], keyInspector).copyBytes(), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data[0], keyInspector).copyBytes(), values);
return;
case BYTE:
- sketch_.update(PrimitiveObjectInspectorUtils.getByte(data[0], keyInspector), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getByte(data[0], keyInspector), values);
return;
case DOUBLE:
- sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data[0], keyInspector), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data[0], keyInspector), values);
return;
case FLOAT:
- sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data[0], keyInspector), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data[0], keyInspector), values);
return;
case INT:
- sketch_.update(PrimitiveObjectInspectorUtils.getInt(data[0], keyInspector), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getInt(data[0], keyInspector), values);
return;
case LONG:
- sketch_.update(PrimitiveObjectInspectorUtils.getLong(data[0], keyInspector), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getLong(data[0], keyInspector), values);
return;
case STRING:
- sketch_.update(PrimitiveObjectInspectorUtils.getString(data[0], keyInspector), values);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getString(data[0], keyInspector), values);
return;
default:
throw new IllegalArgumentException(
@@ -76,16 +76,16 @@
@Override
ArrayOfDoublesSketch getResult() {
- if (sketch_ == null) { return null; }
+ if (this.sketch_ == null) { return null; }
// assumes that it is called once at the end of processing
// since trimming to nominal number of entries is expensive
- sketch_.trim();
- return sketch_.compact();
+ this.sketch_.trim();
+ return this.sketch_.compact();
}
@Override
void reset() {
- sketch_ = null;
+ this.sketch_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchStats.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchStats.java
index 06890fd..00e1014 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchStats.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchStats.java
@@ -20,8 +20,8 @@
package org.apache.datasketches.hive.tuple;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
class ArrayOfDoublesSketchStats {
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.java
index 1dc805f..ccfc731 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.java
@@ -23,8 +23,8 @@
import java.util.List;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDF.java
index 89da31a..e988c07 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDF.java
@@ -23,9 +23,9 @@
import java.util.List;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDF.java
index c5daae6..7f57752 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDF.java
@@ -24,8 +24,8 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.java
index 9b42b24..914fd2b 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.java
@@ -20,8 +20,8 @@
package org.apache.datasketches.hive.tuple;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDF.java
index cfee6b9..f556640 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDF.java
@@ -23,9 +23,9 @@
import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.datasketches.quantiles.DoublesSketchBuilder;
import org.apache.datasketches.quantiles.UpdateDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java
index ee9bda3..0f2ee8d 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTF.java
@@ -24,9 +24,9 @@
import java.util.List;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -53,10 +53,10 @@
throw new UDFArgumentTypeException(0, "Primitive argument expected, but "
+ inspectors[0].getCategory().name() + " was recieved");
}
- inputObjectInspector = (PrimitiveObjectInspector) inspectors[0];
- if (inputObjectInspector.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
+ this.inputObjectInspector = (PrimitiveObjectInspector) inspectors[0];
+ if (this.inputObjectInspector.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
throw new UDFArgumentTypeException(0, "Binary value expected as the first argument, but "
- + inputObjectInspector.getPrimitiveCategory().name() + " was recieved");
+ + this.inputObjectInspector.getPrimitiveCategory().name() + " was recieved");
}
return ObjectInspectorFactory.getStandardStructObjectInspector(
@@ -73,7 +73,7 @@
public void process(final Object[] data) throws HiveException {
if (data == null || data[0] == null) { return; }
final BytesWritable serializedSketch =
- (BytesWritable) inputObjectInspector.getPrimitiveWritableObject(data[0]);
+ (BytesWritable) this.inputObjectInspector.getPrimitiveWritableObject(data[0]);
final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(
BytesWritableHelper.wrapAsMemory(serializedSketch));
final ArrayOfDoublesSketchIterator it = sketch.iterator();
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDF.java
index f2ba17f..8612917 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDF.java
@@ -24,8 +24,8 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDF.java
index 4ed64dc..aafee45 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDF.java
@@ -25,8 +25,8 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.commons.math3.stat.inference.TTest;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;
@@ -48,7 +48,7 @@
* @return list of p-values
*/
public List<Double> evaluate(final BytesWritable serializedSketchA, final BytesWritable serializedSketchB) {
- if ((serializedSketchA == null) || (serializedSketchB == null)) { return null; }
+ if (serializedSketchA == null || serializedSketchB == null) { return null; }
final ArrayOfDoublesSketch sketchA =
ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory(serializedSketchA));
final ArrayOfDoublesSketch sketchB =
@@ -59,7 +59,7 @@
}
// If the sketches contain fewer than 2 values, the p-value can't be calculated
- if ((sketchA.getRetainedEntries() < 2) || (sketchB.getRetainedEntries() < 2)) {
+ if (sketchA.getRetainedEntries() < 2 || sketchB.getRetainedEntries() < 2) {
return null;
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesState.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesState.java
index 59743a0..26a0675 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesState.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesState.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.hive.tuple;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggregationBuffer;
abstract class ArrayOfDoublesState extends AbstractAggregationBuffer {
@@ -28,16 +28,16 @@
private int numValues_;
void init(final int numNominalEntries, final int numValues) {
- nominalNumEntries_ = numNominalEntries;
- numValues_ = numValues;
+ this.nominalNumEntries_ = numNominalEntries;
+ this.numValues_ = numValues;
}
int getNominalNumEntries() {
- return nominalNumEntries_;
+ return this.nominalNumEntries_;
}
int getNumValues() {
- return numValues_;
+ return this.numValues_;
}
abstract ArrayOfDoublesSketch getResult();
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesUnionState.java b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesUnionState.java
index 2610f7f..69e1a28 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesUnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesUnionState.java
@@ -19,38 +19,38 @@
package org.apache.datasketches.hive.tuple;
-import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion;
class ArrayOfDoublesUnionState extends ArrayOfDoublesState {
private ArrayOfDoublesUnion union_;
boolean isInitialized() {
- return union_ != null;
+ return this.union_ != null;
}
@Override
void init(final int nominalNumEntries, final int numValues) {
super.init(nominalNumEntries, numValues);
- union_ = new ArrayOfDoublesSetOperationBuilder()
+ this.union_ = new ArrayOfDoublesSetOperationBuilder()
.setNominalEntries(nominalNumEntries).setNumberOfValues(numValues).buildUnion();
}
void update(final ArrayOfDoublesSketch sketch) {
- union_.update(sketch);
+ this.union_.union(sketch);
}
@Override
ArrayOfDoublesSketch getResult() {
- if (union_ == null) { return null; }
- return union_.getResult();
+ if (this.union_ == null) { return null; }
+ return this.union_.getResult();
}
@Override
void reset() {
- union_ = null;
+ this.union_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAF.java
index dc6d3f2..b7edded 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAF.java
@@ -106,31 +106,31 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
super.init(mode, parameters);
- mode_ = mode;
+ this.mode_ = mode;
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
// input is original data
- keyInspector_ = (PrimitiveObjectInspector) parameters[0];
- numValues_ = 0;
- while ((numValues_ + 1) < parameters.length) {
- if (((PrimitiveObjectInspector) parameters[numValues_ + 1]).getPrimitiveCategory()
+ this.keyInspector_ = (PrimitiveObjectInspector) parameters[0];
+ this.numValues_ = 0;
+ while ((this.numValues_ + 1) < parameters.length) {
+ if (((PrimitiveObjectInspector) parameters[this.numValues_ + 1]).getPrimitiveCategory()
!= PrimitiveCategory.DOUBLE) {
break;
}
- numValues_++;
+ this.numValues_++;
}
- valuesInspectors_ = new PrimitiveObjectInspector[numValues_];
- for (int i = 0; i < numValues_; i++) {
- valuesInspectors_[i] = (PrimitiveObjectInspector) parameters[i + 1];
+ this.valuesInspectors_ = new PrimitiveObjectInspector[this.numValues_];
+ for (int i = 0; i < this.numValues_; i++) {
+ this.valuesInspectors_[i] = (PrimitiveObjectInspector) parameters[i + 1];
}
- if (parameters.length > (numValues_ + 1)) {
- nominalNumEntriesInspector_ = (PrimitiveObjectInspector) parameters[numValues_ + 1];
+ if (parameters.length > (this.numValues_ + 1)) {
+ this.nominalNumEntriesInspector_ = (PrimitiveObjectInspector) parameters[this.numValues_ + 1];
}
- if (parameters.length > (numValues_ + 2)) {
- samplingProbabilityInspector_ = (PrimitiveObjectInspector) parameters[numValues_ + 2];
+ if (parameters.length > (this.numValues_ + 2)) {
+ this.samplingProbabilityInspector_ = (PrimitiveObjectInspector) parameters[this.numValues_ + 2];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateInspector_ = (StructObjectInspector) parameters[0];
+ this.intermediateInspector_ = (StructObjectInspector) parameters[0];
}
if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
@@ -156,27 +156,27 @@
if (!state.isInitialized()) {
initializeState(state, data);
}
- state.update(data, keyInspector_, valuesInspectors_);
+ state.update(data, this.keyInspector_, this.valuesInspectors_);
}
private void initializeState(final ArrayOfDoublesSketchState state, final Object[] data) {
int nominalNumEntries = DEFAULT_NOMINAL_ENTRIES;
- if (nominalNumEntriesInspector_ != null) {
+ if (this.nominalNumEntriesInspector_ != null) {
nominalNumEntries =
- PrimitiveObjectInspectorUtils.getInt(data[numValues_ + 1], nominalNumEntriesInspector_);
+ PrimitiveObjectInspectorUtils.getInt(data[this.numValues_ + 1], this.nominalNumEntriesInspector_);
}
float samplingProbability = DEFAULT_SAMPLING_PROBABILITY;
- if (samplingProbabilityInspector_ != null) {
- samplingProbability = PrimitiveObjectInspectorUtils.getFloat(data[numValues_ + 2],
- samplingProbabilityInspector_);
+ if (this.samplingProbabilityInspector_ != null) {
+ samplingProbability = PrimitiveObjectInspectorUtils.getFloat(data[this.numValues_ + 2],
+ this.samplingProbabilityInspector_);
}
- state.init(nominalNumEntries, samplingProbability, numValues_);
+ state.init(nominalNumEntries, samplingProbability, this.numValues_);
}
@SuppressWarnings("deprecation")
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- if ((mode_ == Mode.PARTIAL1) || (mode_ == Mode.COMPLETE)) {
+ if ((this.mode_ == Mode.PARTIAL1) || (this.mode_ == Mode.COMPLETE)) {
return new ArrayOfDoublesSketchState();
}
return new ArrayOfDoublesUnionState();
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/DataToDoubleSummaryWithModeSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/tuple/DataToDoubleSummaryWithModeSketchUDAF.java
index d668e81..16d6777 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/DataToDoubleSummaryWithModeSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/DataToDoubleSummaryWithModeSketchUDAF.java
@@ -100,7 +100,7 @@
private DoubleSummary.Mode summaryMode_;
public DataToDoubleSummaryWithModeSketchEvaluator() {
- summaryMode_ = DoubleSummary.Mode.Sum;
+ this.summaryMode_ = DoubleSummary.Mode.Sum;
}
@Override
@@ -115,7 +115,7 @@
if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
if (inspectors.length > 4) {
- summaryModeInspector_ = (PrimitiveObjectInspector) inspectors[4];
+ this.summaryModeInspector_ = (PrimitiveObjectInspector) inspectors[4];
}
}
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
@@ -134,12 +134,12 @@
@Override
protected SummaryFactory<DoubleSummary> getSummaryFactory(final Object[] data) {
- if (summaryModeInspector_ != null) {
- summaryMode_ = DoubleSummary.Mode.valueOf(
- PrimitiveObjectInspectorUtils.getString(data[4], summaryModeInspector_)
+ if (this.summaryModeInspector_ != null) {
+ this.summaryMode_ = DoubleSummary.Mode.valueOf(
+ PrimitiveObjectInspectorUtils.getString(data[4], this.summaryModeInspector_)
);
}
- return new DoubleSummaryFactory(summaryMode_);
+ return new DoubleSummaryFactory(this.summaryMode_);
}
@Override
@@ -158,16 +158,16 @@
final byte[] bytes = intermediate.toByteArray();
return Arrays.asList(
new IntWritable(state.getNominalNumEntries()),
- new Text(summaryMode_.toString()),
+ new Text(this.summaryMode_.toString()),
new BytesWritable(bytes)
);
}
@Override
protected SummarySetOperations<DoubleSummary> getSummarySetOperationsForMerge(final Object data) {
- summaryMode_ = DoubleSummary.Mode.valueOf(((Text) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SUMMARY_MODE_FIELD))).toString());
- return new DoubleSummarySetOperations(summaryMode_);
+ this.summaryMode_ = DoubleSummary.Mode.valueOf(((Text) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SUMMARY_MODE_FIELD))).toString());
+ return new DoubleSummarySetOperations(this.summaryMode_);
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/DataToSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/tuple/DataToSketchUDAF.java
index b236329..97ea58e 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/DataToSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/DataToSketchUDAF.java
@@ -39,6 +39,12 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+/**
+ * <p><b>Note</b> Strings as raw data values are encoded as a UTF-16 VARCHAR
+ * prior to being submitted to the sketch. If the user requires a different
+ * encoding for cross-platform compatibility, it is recommended that these values be encoded prior
+ * to being submitted and then typed as a BINARY byte[].</p>
+ */
@SuppressWarnings("javadoc")
public abstract class DataToSketchUDAF extends AbstractGenericUDAFResolver {
@@ -63,8 +69,8 @@
if (inspectors.length > 3) {
ObjectInspectorValidator.validateCategoryPrimitive(inspectors[3], 3);
final PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) inspectors[3];
- if ((primitiveInspector.getPrimitiveCategory() != PrimitiveCategory.FLOAT)
- && (primitiveInspector.getPrimitiveCategory() != PrimitiveCategory.DOUBLE)) {
+ if (primitiveInspector.getPrimitiveCategory() != PrimitiveCategory.FLOAT
+ && primitiveInspector.getPrimitiveCategory() != PrimitiveCategory.DOUBLE) {
throw new UDFArgumentTypeException(3, "float or double value expected as parameter 4 but "
+ primitiveInspector.getPrimitiveCategory().name() + " was received");
}
@@ -106,23 +112,23 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] inspectors) throws HiveException {
super.init(mode, inspectors);
- mode_ = mode;
- if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
+ this.mode_ = mode;
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
- keyInspector_ = (PrimitiveObjectInspector) inspectors[0];
- valueInspector_ = (PrimitiveObjectInspector) inspectors[1];
+ this.keyInspector_ = (PrimitiveObjectInspector) inspectors[0];
+ this.valueInspector_ = (PrimitiveObjectInspector) inspectors[1];
if (inspectors.length > 2) {
- nominalNumEntriesInspector_ = (PrimitiveObjectInspector) inspectors[2];
+ this.nominalNumEntriesInspector_ = (PrimitiveObjectInspector) inspectors[2];
}
if (inspectors.length > 3) {
- samplingProbabilityInspector_ = (PrimitiveObjectInspector) inspectors[3];
+ this.samplingProbabilityInspector_ = (PrimitiveObjectInspector) inspectors[3];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateInspector_ = (StructObjectInspector) inspectors[0];
+ this.intermediateInspector_ = (StructObjectInspector) inspectors[0];
}
- if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
// intermediate results need to include the nominal number of entries
return ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(NOMINAL_NUM_ENTRIES_FIELD, SKETCH_FIELD),
@@ -145,18 +151,18 @@
if (!state.isInitialized()) {
initializeState(state, data);
}
- state.update(data[0], keyInspector_, extractValue(data[1], valueInspector_));
+ state.update(data[0], this.keyInspector_, extractValue(data[1], this.valueInspector_));
}
private void initializeState(final SketchState<U, S> state, final Object[] data) {
int nominalNumEntries = DEFAULT_NOMINAL_ENTRIES;
- if (nominalNumEntriesInspector_ != null) {
- nominalNumEntries = PrimitiveObjectInspectorUtils.getInt(data[2], nominalNumEntriesInspector_);
+ if (this.nominalNumEntriesInspector_ != null) {
+ nominalNumEntries = PrimitiveObjectInspectorUtils.getInt(data[2], this.nominalNumEntriesInspector_);
}
float samplingProbability = DEFAULT_SAMPLING_PROBABILITY;
- if (samplingProbabilityInspector_ != null) {
+ if (this.samplingProbabilityInspector_ != null) {
samplingProbability = PrimitiveObjectInspectorUtils.getFloat(data[3],
- samplingProbabilityInspector_);
+ this.samplingProbabilityInspector_);
}
state.init(nominalNumEntries, samplingProbability, getSummaryFactory(data));
}
@@ -164,7 +170,7 @@
@SuppressWarnings("deprecation")
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- if ((mode_ == Mode.PARTIAL1) || (mode_ == Mode.COMPLETE)) {
+ if (this.mode_ == Mode.PARTIAL1 || this.mode_ == Mode.COMPLETE) {
return new SketchState<U, S>();
}
return new UnionState<S>();
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/SketchEvaluator.java b/src/main/java/org/apache/datasketches/hive/tuple/SketchEvaluator.java
index 03449c2..3af36d5 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/SketchEvaluator.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/SketchEvaluator.java
@@ -91,16 +91,16 @@
initializeState(state, data);
}
final BytesWritable serializedSketch =
- (BytesWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
+ (BytesWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SKETCH_FIELD));
state.update(Sketches.heapifySketch(
BytesWritableHelper.wrapAsMemory(serializedSketch),
getSummaryDeserializer()));
}
protected void initializeState(final UnionState<S> state, final Object data) {
- final int nominalNumEntries = ((IntWritable) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(NOMINAL_NUM_ENTRIES_FIELD))).get();
+ final int nominalNumEntries = ((IntWritable) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(NOMINAL_NUM_ENTRIES_FIELD))).get();
state.init(nominalNumEntries, getSummarySetOperationsForMerge(data));
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/SketchState.java b/src/main/java/org/apache/datasketches/hive/tuple/SketchState.java
index 14c17f8..ccc7093 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/SketchState.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/SketchState.java
@@ -32,38 +32,38 @@
private UpdatableSketch<U, S> sketch_;
boolean isInitialized() {
- return sketch_ != null;
+ return this.sketch_ != null;
}
void init(final int nominalNumEntries, final float samplingProbability,
final SummaryFactory<S> summaryFactory) {
super.init(nominalNumEntries);
- sketch_ = new UpdatableSketchBuilder<U, S>(summaryFactory).setNominalEntries(nominalNumEntries)
+ this.sketch_ = new UpdatableSketchBuilder<>(summaryFactory).setNominalEntries(nominalNumEntries)
.setSamplingProbability(samplingProbability).build();
}
void update(final Object data, final PrimitiveObjectInspector keyObjectInspector, final U value) {
switch (keyObjectInspector.getPrimitiveCategory()) {
case BINARY:
- sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data, keyObjectInspector).copyBytes(), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getBinary(data, keyObjectInspector).copyBytes(), value);
return;
case BYTE:
- sketch_.update(PrimitiveObjectInspectorUtils.getByte(data, keyObjectInspector), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getByte(data, keyObjectInspector), value);
return;
case DOUBLE:
- sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data, keyObjectInspector), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getDouble(data, keyObjectInspector), value);
return;
case FLOAT:
- sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data, keyObjectInspector), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getFloat(data, keyObjectInspector), value);
return;
case INT:
- sketch_.update(PrimitiveObjectInspectorUtils.getInt(data, keyObjectInspector), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getInt(data, keyObjectInspector), value);
return;
case LONG:
- sketch_.update(PrimitiveObjectInspectorUtils.getLong(data, keyObjectInspector), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getLong(data, keyObjectInspector), value);
return;
case STRING:
- sketch_.update(PrimitiveObjectInspectorUtils.getString(data, keyObjectInspector), value);
+ this.sketch_.update(PrimitiveObjectInspectorUtils.getString(data, keyObjectInspector), value);
return;
default:
throw new IllegalArgumentException(
@@ -74,16 +74,16 @@
@Override
Sketch<S> getResult() {
- if (sketch_ == null) { return null; }
+ if (this.sketch_ == null) { return null; }
// assumes that it is called once at the end of processing
// since trimming to nominal number of entries is expensive
- sketch_.trim();
- return sketch_.compact();
+ this.sketch_.trim();
+ return this.sketch_.compact();
}
@Override
void reset() {
- sketch_ = null;
+ this.sketch_ = null;
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/State.java b/src/main/java/org/apache/datasketches/hive/tuple/State.java
index e2d774a..93b6c9e 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/State.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/State.java
@@ -28,11 +28,11 @@
private int nominalNumEntries_;
void init(final int nominalNumEntries) {
- nominalNumEntries_ = nominalNumEntries;
+ this.nominalNumEntries_ = nominalNumEntries;
}
int getNominalNumEntries() {
- return nominalNumEntries_;
+ return this.nominalNumEntries_;
}
abstract Sketch<S> getResult();
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAF.java
index 324ef06..8f6a75d 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAF.java
@@ -24,7 +24,7 @@
import java.util.Arrays;
import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -91,21 +91,21 @@
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] inspectors) throws HiveException {
super.init(mode, inspectors);
- if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
// input is original data
- sketchInspector_ = (PrimitiveObjectInspector) inspectors[0];
+ this.sketchInspector_ = (PrimitiveObjectInspector) inspectors[0];
if (inspectors.length > 1) {
- nominalNumEntriesInspector_ = (PrimitiveObjectInspector) inspectors[1];
+ this.nominalNumEntriesInspector_ = (PrimitiveObjectInspector) inspectors[1];
}
if (inspectors.length > 2) {
- numValuesInspector_ = (PrimitiveObjectInspector) inspectors[2];
+ this.numValuesInspector_ = (PrimitiveObjectInspector) inspectors[2];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateInspector_ = (StructObjectInspector) inspectors[0];
+ this.intermediateInspector_ = (StructObjectInspector) inspectors[0];
}
- if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
// intermediate results need to include the the nominal number of entries and number of values
return ObjectInspectorFactory.getStandardStructObjectInspector(
Arrays.asList(NOMINAL_NUM_ENTRIES_FIELD, NUM_VALUES_FIELD, SKETCH_FIELD),
@@ -128,19 +128,19 @@
if (!state.isInitialized()) {
initializeState(state, data);
}
- final byte[] serializedSketch = (byte[]) sketchInspector_.getPrimitiveJavaObject(data[0]);
+ final byte[] serializedSketch = (byte[]) this.sketchInspector_.getPrimitiveJavaObject(data[0]);
if (serializedSketch == null) { return; }
state.update(ArrayOfDoublesSketches.wrapSketch(Memory.wrap(serializedSketch)));
}
private void initializeState(final ArrayOfDoublesUnionState state, final Object[] data) {
int nominalNumEntries = DEFAULT_NOMINAL_ENTRIES;
- if (nominalNumEntriesInspector_ != null) {
- nominalNumEntries = PrimitiveObjectInspectorUtils.getInt(data[1], nominalNumEntriesInspector_);
+ if (this.nominalNumEntriesInspector_ != null) {
+ nominalNumEntries = PrimitiveObjectInspectorUtils.getInt(data[1], this.nominalNumEntriesInspector_);
}
int numValues = DEFAULT_NUM_VALUES;
- if (numValuesInspector_ != null) {
- numValues = PrimitiveObjectInspectorUtils.getInt(data[2], numValuesInspector_);
+ if (this.numValuesInspector_ != null) {
+ numValues = PrimitiveObjectInspectorUtils.getInt(data[2], this.numValuesInspector_);
}
state.init(nominalNumEntries, numValues);
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/UnionDoubleSummaryWithModeSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/tuple/UnionDoubleSummaryWithModeSketchUDAF.java
index feb8ff2..ffcbbe0 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/UnionDoubleSummaryWithModeSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/UnionDoubleSummaryWithModeSketchUDAF.java
@@ -88,7 +88,7 @@
private DoubleSummary.Mode summaryMode_;
public UnionDoubleSummaryWithModeSketchEvaluator() {
- summaryMode_ = DoubleSummary.Mode.Sum;
+ this.summaryMode_ = DoubleSummary.Mode.Sum;
}
@Override
@@ -103,7 +103,7 @@
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
// input is original data
if (inspectors.length > 2) {
- summaryModeInspector_ = (PrimitiveObjectInspector) inspectors[2];
+ this.summaryModeInspector_ = (PrimitiveObjectInspector) inspectors[2];
}
}
if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
@@ -127,12 +127,12 @@
@Override
protected SummarySetOperations<DoubleSummary> getSummarySetOperationsForIterate(final Object[] data) {
- if (summaryModeInspector_ != null) {
- summaryMode_ = DoubleSummary.Mode.valueOf(
- PrimitiveObjectInspectorUtils.getString(data[2], summaryModeInspector_)
+ if (this.summaryModeInspector_ != null) {
+ this.summaryMode_ = DoubleSummary.Mode.valueOf(
+ PrimitiveObjectInspectorUtils.getString(data[2], this.summaryModeInspector_)
);
}
- return new DoubleSummarySetOperations(summaryMode_);
+ return new DoubleSummarySetOperations(this.summaryMode_);
}
// need to add summary mode
@@ -146,16 +146,16 @@
final byte[] bytes = intermediate.toByteArray();
return Arrays.asList(
new IntWritable(state.getNominalNumEntries()),
- new Text(summaryMode_.toString()),
+ new Text(this.summaryMode_.toString()),
new BytesWritable(bytes)
);
}
@Override
protected SummarySetOperations<DoubleSummary> getSummarySetOperationsForMerge(final Object data) {
- summaryMode_ = DoubleSummary.Mode.valueOf(((Text) intermediateInspector_.getStructFieldData(
- data, intermediateInspector_.getStructFieldRef(SUMMARY_MODE_FIELD))).toString());
- return new DoubleSummarySetOperations(summaryMode_);
+ this.summaryMode_ = DoubleSummary.Mode.valueOf(((Text) this.intermediateInspector_.getStructFieldData(
+ data, this.intermediateInspector_.getStructFieldRef(SUMMARY_MODE_FIELD))).toString());
+ return new DoubleSummarySetOperations(this.summaryMode_);
}
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/UnionSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/tuple/UnionSketchUDAF.java
index f097ff1..d1c5522 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/UnionSketchUDAF.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/UnionSketchUDAF.java
@@ -91,13 +91,13 @@
super.init(mode, inspectors);
if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
// input is original data
- sketchInspector_ = (PrimitiveObjectInspector) inspectors[0];
+ this.sketchInspector_ = (PrimitiveObjectInspector) inspectors[0];
if (inspectors.length > 1) {
- nominalNumEntriesInspector_ = (PrimitiveObjectInspector) inspectors[1];
+ this.nominalNumEntriesInspector_ = (PrimitiveObjectInspector) inspectors[1];
}
} else {
// input for PARTIAL2 and FINAL is the output from PARTIAL1
- intermediateInspector_ = (StructObjectInspector) inspectors[0];
+ this.intermediateInspector_ = (StructObjectInspector) inspectors[0];
}
if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
@@ -123,15 +123,15 @@
if (!state.isInitialized()) {
initializeState(state, data);
}
- final byte[] serializedSketch = (byte[]) sketchInspector_.getPrimitiveJavaObject(data[0]);
+ final byte[] serializedSketch = (byte[]) this.sketchInspector_.getPrimitiveJavaObject(data[0]);
if (serializedSketch == null) { return; }
state.update(Sketches.heapifySketch(Memory.wrap(serializedSketch), getSummaryDeserializer()));
}
protected void initializeState(final UnionState<S> state, final Object[] data) {
int nominalNumEntries = DEFAULT_NOMINAL_ENTRIES;
- if (nominalNumEntriesInspector_ != null) {
- nominalNumEntries = PrimitiveObjectInspectorUtils.getInt(data[1], nominalNumEntriesInspector_);
+ if (this.nominalNumEntriesInspector_ != null) {
+ nominalNumEntries = PrimitiveObjectInspectorUtils.getInt(data[1], this.nominalNumEntriesInspector_);
}
state.init(nominalNumEntries, getSummarySetOperationsForIterate(data));
}
diff --git a/src/main/java/org/apache/datasketches/hive/tuple/UnionState.java b/src/main/java/org/apache/datasketches/hive/tuple/UnionState.java
index 90b9383..8a56f3a 100644
--- a/src/main/java/org/apache/datasketches/hive/tuple/UnionState.java
+++ b/src/main/java/org/apache/datasketches/hive/tuple/UnionState.java
@@ -29,27 +29,27 @@
private Union<S> union_;
boolean isInitialized() {
- return union_ != null;
+ return this.union_ != null;
}
void init(final int nominalNumEntries, final SummarySetOperations<S> summarySetOps) {
super.init(nominalNumEntries);
- union_ = new Union<S>(nominalNumEntries, summarySetOps);
+ this.union_ = new Union<>(nominalNumEntries, summarySetOps);
}
void update(final Sketch<S> sketch) {
- union_.update(sketch);
+ this.union_.union(sketch);
}
@Override
Sketch<S> getResult() {
- if (union_ == null) { return null; }
- return union_.getResult();
+ if (this.union_ == null) { return null; }
+ return this.union_.getResult();
}
@Override
void reset() {
- union_ = null;
+ this.union_ = null;
}
}
diff --git a/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java b/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java
index a8fd68e..12d4a07 100644
--- a/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java
@@ -106,6 +106,7 @@
func.initialize(inspectors);
}
+ @SuppressWarnings("synthetic-access")
@Test
public void normalCase() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, stringInspector };
@@ -173,7 +174,7 @@
@Override
public void collect(Object object) throws HiveException {
- list.add(object);
+ this.list.add(object);
}
}
diff --git a/src/test/java/org/apache/datasketches/hive/theta/DataToSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/theta/DataToSketchUDAFTest.java
index ed4afe6..3d9c0ca 100644
--- a/src/test/java/org/apache/datasketches/hive/theta/DataToSketchUDAFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/theta/DataToSketchUDAFTest.java
@@ -214,7 +214,7 @@
// check if seed is correct in the result
Union union = SetOperation.builder().setSeed(seed).buildUnion();
// this must fail if the seed is incompatible
- union.update(resultSketch);
+ union.union(resultSketch);
}
}
diff --git a/src/test/java/org/apache/datasketches/hive/theta/ExcludeSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/theta/ExcludeSketchUDFTest.java
index 225f18a..5cfd07f 100644
--- a/src/test/java/org/apache/datasketches/hive/theta/ExcludeSketchUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/theta/ExcludeSketchUDFTest.java
@@ -20,44 +20,38 @@
package org.apache.datasketches.hive.theta;
import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.testng.Assert.fail;
import static org.testng.AssertJUnit.assertEquals;
+import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.hadoop.io.BytesWritable;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.Sketches;
import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.hadoop.io.BytesWritable;
+import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class ExcludeSketchUDFTest {
+ @SuppressWarnings("unused")
@Test
public void evaluateNull() {
ExcludeSketchUDF testObject = new ExcludeSketchUDF();
-
- BytesWritable intermResult = testObject.evaluate(null, null);
-
- Memory mem = BytesWritableHelper.wrapAsMemory(intermResult);
-
- Sketch testResult = Sketches.wrapSketch(mem);
-
- assertEquals(0.0, testResult.getEstimate());
+ try {
+ BytesWritable intermResult = testObject.evaluate(null, null);
+ fail();
+ } catch (SketchesArgumentException e) {}
}
+ @SuppressWarnings("unused")
@Test
- public void evaluateEmpty() {
+ public void evaluateEmpty() { //Current impl is more restrictive than it needs to be.
ExcludeSketchUDF testObject = new ExcludeSketchUDF();
-
- BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());
-
- Memory mem = BytesWritableHelper.wrapAsMemory(intermResult);
-
- Sketch testResult = Sketches.wrapSketch(mem);
-
- assertEquals(0.0, testResult.getEstimate());
+ try {
+ BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());
+ fail();
+ } catch (SketchesArgumentException e) {}
}
@Test
diff --git a/src/test/java/org/apache/datasketches/hive/theta/IntersectSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/theta/IntersectSketchUDFTest.java
index 9792c90..49ad0db 100644
--- a/src/test/java/org/apache/datasketches/hive/theta/IntersectSketchUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/theta/IntersectSketchUDFTest.java
@@ -21,34 +21,33 @@
import static org.testng.AssertJUnit.assertEquals;
+import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.hadoop.io.BytesWritable;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.Sketches;
import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.hadoop.io.BytesWritable;
+import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class IntersectSketchUDFTest {
+ @SuppressWarnings("unused")
@Test
public void evaluateNull() {
IntersectSketchUDF testObject = new IntersectSketchUDF();
- BytesWritable intermResult = testObject.evaluate(null, null);
- Memory mem = BytesWritableHelper.wrapAsMemory(intermResult);
- Sketch testResult = Sketches.wrapSketch(mem);
- assertEquals(0.0, testResult.getEstimate());
+ try {
+ BytesWritable intermResult = testObject.evaluate(null, null);
+ } catch (SketchesArgumentException e) {}
}
+ @SuppressWarnings("unused")
@Test
- public void evaluateEmpty() {
+ public void evaluateEmpty() { //Current impl is more restrictive than it needs to be.
IntersectSketchUDF testObject = new IntersectSketchUDF();
- BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());
- Memory mem = BytesWritableHelper.wrapAsMemory(intermResult);
- Sketch testResult = Sketches.wrapSketch(mem);
- assertEquals(0.0, testResult.getEstimate());
+ try {
+ BytesWritable intermResult = testObject.evaluate(new BytesWritable(), new BytesWritable());
+ } catch (SketchesArgumentException e) {}
}
@Test
diff --git a/src/test/java/org/apache/datasketches/hive/theta/SampleSketchUDF.java b/src/test/java/org/apache/datasketches/hive/theta/SampleSketchUDF.java
index 07b14d9..23c06cd 100644
--- a/src/test/java/org/apache/datasketches/hive/theta/SampleSketchUDF.java
+++ b/src/test/java/org/apache/datasketches/hive/theta/SampleSketchUDF.java
@@ -20,13 +20,12 @@
package org.apache.datasketches.hive.theta;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.io.BytesWritable;
-
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.Union;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
/**
* Hive estimate sketch UDF.
@@ -65,7 +64,7 @@
// The builder will catch errors with improper sketchSize or probability
Union union = SetOperation.builder().setP(probability).setNominalEntries(sketchSize).buildUnion();
- union.update(serializedSketch); //Union can accept Memory object directly
+ union.union(serializedSketch); //Union can accept Memory object directly
Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap)
byte[] resultSketch = intermediateSketch.toByteArray();
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDFTest.java
index 652e8f3..a474097 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimateAndErrorBoundsUDFTest.java
@@ -21,8 +21,8 @@
import java.util.List;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDFTest.java
index 19f4ff3..4fbec88 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToEstimatesUDFTest.java
@@ -21,8 +21,8 @@
import java.util.List;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDFTest.java
index 5997d87..3e5bda8 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToMeansUDFTest.java
@@ -22,8 +22,8 @@
import java.util.List;
import java.util.Random;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDFTest.java
index d7c40a5..254e4a5 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntriesUDFTest.java
@@ -19,13 +19,12 @@
package org.apache.datasketches.hive.tuple;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
@SuppressWarnings("javadoc")
public class ArrayOfDoublesSketchToNumberOfRetainedEntriesUDFTest {
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDFTest.java
index 2b4e45d..fa3b88e 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToQuantilesSketchUDFTest.java
@@ -20,14 +20,13 @@
package org.apache.datasketches.hive.tuple;
import org.apache.datasketches.hive.common.BytesWritableHelper;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
-import org.apache.datasketches.quantiles.DoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
@SuppressWarnings("javadoc")
public class ArrayOfDoublesSketchToQuantilesSketchUDFTest {
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTFTest.java
index a7c3367..4b0727e 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToValuesUDTFTest.java
@@ -23,6 +23,8 @@
import java.util.Arrays;
import java.util.List;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -32,17 +34,14 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
@SuppressWarnings("javadoc")
public class ArrayOfDoublesSketchToValuesUDTFTest {
@@ -89,7 +88,7 @@
func.initialize(inspectors);
}
- @SuppressWarnings({ "deprecation", "unchecked" })
+ @SuppressWarnings({ "deprecation", "unchecked", "synthetic-access" })
@Test
public void normalCase() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
@@ -104,8 +103,8 @@
func.process(new Object[] {new BytesWritable(sketch.toByteArray())});
Assert.assertEquals(collector.list.size(), 2);
Assert.assertEquals(((Object[]) collector.list.get(0)).length, 1);
- Assert.assertEquals(((List<Double>) ((Object[]) collector.list.get(0))[0]), Arrays.asList(1.0, 2.0));
- Assert.assertEquals(((List<Double>) ((Object[]) collector.list.get(1))[0]), Arrays.asList(1.0, 2.0));
+ Assert.assertEquals((List<Double>) ((Object[]) collector.list.get(0))[0], Arrays.asList(1.0, 2.0));
+ Assert.assertEquals((List<Double>) ((Object[]) collector.list.get(1))[0], Arrays.asList(1.0, 2.0));
}
private static void checkResultInspector(ObjectInspector resultInspector) {
@@ -125,7 +124,7 @@
@Override
public void collect(Object object) throws HiveException {
- list.add(object);
+ this.list.add(object);
}
}
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDFTest.java
index e019d83..8c53b43 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchToVariancesUDFTest.java
@@ -22,8 +22,8 @@
import java.util.List;
import java.util.Random;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDFTest.java
index 26b7c99..13d0560 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/ArrayOfDoublesSketchesTTestUDFTest.java
@@ -21,13 +21,12 @@
import java.util.List;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.io.BytesWritable;
import org.testng.Assert;
import org.testng.annotations.Test;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
@SuppressWarnings("javadoc")
public class ArrayOfDoublesSketchesTTestUDFTest {
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAFTest.java
index 3db4a28..9a2e17a 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/DataToArrayOfDoublesSketchUDAFTest.java
@@ -25,10 +25,10 @@
import java.util.List;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
diff --git a/src/test/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAFTest.java
index 87d0a83..5c94869 100644
--- a/src/test/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/tuple/UnionArrayOfDoublesSketchUDAFTest.java
@@ -25,10 +25,10 @@
import java.util.List;
import org.apache.datasketches.hive.common.BytesWritableHelper;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
-import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.parse.SemanticException;