Fixes in preparation for 2.0.0-RC2
diff --git a/src/main/java/org/apache/datasketches/theta/package-info.java b/src/main/java/org/apache/datasketches/theta/package-info.java
index 12c886f..9268109 100644
--- a/src/main/java/org/apache/datasketches/theta/package-info.java
+++ b/src/main/java/org/apache/datasketches/theta/package-info.java
@@ -18,46 +18,12 @@
*/
/**
- * <p>The theta package contains all the sketch classes that are members of the
- * <a href="{@docRoot}/resources/dictionary.html#thetaSketch">Theta Sketch Framework</a>.
- * The basic sketching functionality in this package is also
- * accessible from Hadoop Pig UDFs found in the <i>sketches-pig</i> repository,
+ * <p>The theta package contains all the sketch classes that are members of the
+ * <a href="{@docRoot}/resources/dictionary.html#thetaSketch">Theta Sketch Framework</a>.
+ * The basic sketching functionality in this package is also
+ * accessible from Hadoop Pig UDFs found in the <i>sketches-pig</i> repository,
* and from Hadoop Hive UADFs and UDFs found in the <i>sketches-hive</i> repository.
* </p>
- * <h3>Simple Java Example</h3>
- * Note: The complete example code can be found in the parallel package under src/test/java and
- * with the class name "ExamplesTest.java".
-<pre>
- public void SimpleCountingSketch() {
- int k = 4096;
- int u = 1000000;
-
- UpdateSketch sketch = UpdateSketch.builder().build(k);
- for (int i = 0; i < u; i++) {
- sketch.update(i);
- }
-
- println(sketch.toString());
- }
-
-### HeapQuickSelectSketch SUMMARY:
- Nominal Entries (k) : 4096
- Estimate : 1002714.745231455
- Upper Bound, 95% conf : 1027777.3354974985
- Lower Bound, 95% conf : 978261.4472857157
- p : 1.0
- Theta (double) : 0.00654223948655085
- Theta (long) : 60341508738660257
- Theta (long, hex : 00d66048519437a1
- EstMode? : true
- Empty? : false
- Resize Factor : 8
- Array Size Entries : 8192
- Retained Entries : 6560
- Update Seed : 9001
- Seed Hash : ffff93cc
-### END SKETCH SUMMARY
-</pre>
*
* @author Lee Rhodes
*/
diff --git a/src/main/java/org/apache/datasketches/tuple/Intersection.java b/src/main/java/org/apache/datasketches/tuple/Intersection.java
index c10cad7..1dad7d2 100644
--- a/src/main/java/org/apache/datasketches/tuple/Intersection.java
+++ b/src/main/java/org/apache/datasketches/tuple/Intersection.java
@@ -47,7 +47,7 @@
private final SummarySetOperations<S> summarySetOps_;
private boolean empty_;
private long thetaLong_;
- private HashTables hashTables_;
+ private final HashTables hashTables_;
private boolean firstCall_;
/**
@@ -76,7 +76,7 @@
final int countIn = sketchIn.getRetainedEntries();
thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule
// Empty rule extended in case incoming sketch does not have empty bit properly set
- empty_ |= (countIn == 0) && (thetaLongIn == Long.MAX_VALUE);
+ empty_ |= countIn == 0 && thetaLongIn == Long.MAX_VALUE;
if (countIn == 0) {
hashTables_.clear();
return;
@@ -128,7 +128,7 @@
* Updates the internal set by intersecting it with the given Theta sketch.
* @param sketchIn input Theta Sketch to intersect with the internal state. It may not be null.
* @param summary the given proxy summary for the Theta Sketch, which doesn't have one.
- * It may not be null.
+ * It will be copied for each matching index. It may not be null.
*/
public void update(final org.apache.datasketches.theta.Sketch sketchIn, final S summary) {
if (sketchIn == null) { throw new SketchesArgumentException("Sketch may not be null"); }
@@ -140,7 +140,7 @@
final int countIn = sketchIn.getRetainedEntries(true);
thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule
// Empty rule extended in case incoming sketch does not have empty bit properly set
- empty_ |= (countIn == 0) && (thetaLongIn == Long.MAX_VALUE);
+ empty_ |= countIn == 0 && thetaLongIn == Long.MAX_VALUE;
if (countIn == 0) {
hashTables_.clear();
return;
@@ -180,7 +180,7 @@
matchSummaries = (S[]) Array.newInstance(summaryType, maxMatchSize);
}
matchHashArr[matchCount] = hash;
- matchSummaries[matchCount] = summarySetOps_.intersection(mySummary, (S)mySummary.copy());
+ matchSummaries[matchCount] = summarySetOps_.intersection(mySummary, (S)summary.copy());
matchCount++;
}
hashTables_.fromArrays(matchHashArr, matchSummaries, matchCount);
@@ -207,7 +207,7 @@
int cnt = 0;
for (int i = 0; i < tableSize; i++) {
final long hash = hashTables_.hashTable_[i];
- if ((hash == 0) || (hash > thetaLong_)) { continue; }
+ if (hash == 0 || hash > thetaLong_) { continue; }
final S summary = hashTables_.summaryTable_[i];
if (summaries == null) {
summaries = (S[]) Array.newInstance(summaryType, hashTables_.count_);
diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
index ac5ce93..4ddb71d 100644
--- a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
@@ -27,9 +27,10 @@
/**
* This is to provide a method of updating summaries.
- * This should not be called by the user.
+ * This is primarily used internally.
* @param value update value
+ * @return this
*/
- public void update(U value);
+ UpdatableSummary<U> update(U value);
}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
index ae6b7c0..76c1633 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
@@ -37,7 +37,7 @@
/**
* The aggregation modes for this Summary
*/
- public static enum Mode {
+ public enum Mode {
/**
* The aggregation mode is the summation function.
@@ -97,10 +97,10 @@
}
@Override
- public void update(final Double value) {
+ public DoubleSummary update(final Double value) {
switch (mode_) {
case Sum:
- value_ += value.doubleValue();
+ value_ += value;
break;
case Min:
if (value < value_) { value_ = value; }
@@ -112,6 +112,7 @@
value_ = 1.0;
break;
}
+ return this;
}
@Override
diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
index a0e3e29..3af888b 100644
--- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
@@ -37,7 +37,7 @@
/**
* The aggregation modes for this Summary
*/
- public static enum Mode {
+ public enum Mode {
/**
* The aggregation mode is the summation function.
@@ -96,13 +96,8 @@
}
}
- /**
- * Updates an instance of IntegerSummary with the given value.
- * This should not be called by the user.
- * @param value The given value.
- */
@Override
- public void update(final Integer value) {
+ public IntegerSummary update(final Integer value) {
switch (mode_) {
case Sum:
value_ += value;
@@ -117,6 +112,7 @@
value_ = 1;
break;
}
+ return this;
}
@Override
diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java
index 48cfbc6..75ab751 100644
--- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java
@@ -90,11 +90,11 @@
//From UpdatableSummary
@Override
- public void update(final String[] value) {
+ public ArrayOfStringsSummary update(final String[] value) {
if (nodesArr == null) {
nodesArr = value.clone();
}
- //otherwise do not update.
+ return this;
}
//From Object
@@ -106,7 +106,7 @@
@Override
public boolean equals(final Object summary) {
- if ((summary == null) || !(summary instanceof ArrayOfStringsSummary)) {
+ if (summary == null || !(summary instanceof ArrayOfStringsSummary)) {
return false;
}
final String thatStr = stringConcat(((ArrayOfStringsSummary) summary).nodesArr);
@@ -152,7 +152,7 @@
nodeLengthsArr_[i] = nodeBytesArr_[i].length;
sumNodeBytes += nodeLengthsArr_[i];
}
- totBytes_ = sumNodeBytes + ((numNodes_ + 1) * Integer.BYTES) + 1;
+ totBytes_ = sumNodeBytes + (numNodes_ + 1) * Integer.BYTES + 1;
}
}
diff --git a/src/test/java/org/apache/datasketches/theta/ExamplesTest.java b/src/test/java/org/apache/datasketches/theta/ExamplesTest.java
index 64f618a..d4b3080 100644
--- a/src/test/java/org/apache/datasketches/theta/ExamplesTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ExamplesTest.java
@@ -29,10 +29,10 @@
@Test
public void simpleCountingSketch() {
- int k = 4096;
- int u = 1000000;
+ final int k = 4096;
+ final int u = 1000000;
- UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build();
+ final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build();
for (int i = 0; i < u; i++) {
sketch.update(i);
}
@@ -60,6 +60,57 @@
*/
@Test
+ public void theta2dot0Examples() {
+ //Load source sketches
+ final UpdateSketchBuilder bldr = UpdateSketch.builder();
+ final UpdateSketch skA = bldr.build();
+ final UpdateSketch skB = bldr.build();
+ for (int i = 1; i <= 1000; i++) {
+ skA.update(i);
+ skB.update(i + 250);
+ }
+
+ //Union Stateless:
+ Union union = SetOperation.builder().buildUnion();
+ CompactSketch csk = union.union(skA, skB);
+ assert csk.getEstimate() == 1250;
+
+ //Union Stateful:
+ union = SetOperation.builder().buildUnion();
+ union.update(skA); //first call
+ union.update(skB); //2nd through nth calls
+ //...
+ csk = union.getResult();
+ assert csk.getEstimate() == 1250;
+
+ //Intersection Stateless:
+ Intersection inter = SetOperation.builder().buildIntersection();
+ csk = inter.intersect(skA, skB);
+ assert csk.getEstimate() == 750;
+
+ //Intersection Stateful:
+ inter = SetOperation.builder().buildIntersection();
+ inter.intersect(skA); //first call
+ inter.intersect(skB); //2nd through nth calls
+ //...
+ csk = inter.getResult();
+ assert csk.getEstimate() == 750;
+
+ //AnotB Stateless:
+ AnotB diff = SetOperation.builder().buildANotB();
+ csk = diff.aNotB(skA, skB);
+ assert csk.getEstimate() == 250;
+
+ //AnotB Stateful:
+ diff = SetOperation.builder().buildANotB();
+ diff.setA(skA); //first call
+ diff.notB(skB); //2nd through nth calls
+ //...
+ csk = diff.getResult(true);
+ assert csk.getEstimate() == 250;
+ }
+
+ @Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
@@ -67,7 +118,7 @@
/**
* @param s value to print
*/
- static void println(String s) {
+ static void println(final String s) {
//System.out.println(s); //enable/disable here
}
diff --git a/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java b/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java
new file mode 100644
index 0000000..35d67b6
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple;
+
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
+import org.apache.datasketches.tuple.aninteger.IntegerSummary;
+import org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode;
+import org.apache.datasketches.tuple.aninteger.IntegerSummaryFactory;
+import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations;
+import org.testng.annotations.Test;
+
+
+@SuppressWarnings("javadoc")
+public class TupleExamplesTest {
+ private final IntegerSummary.Mode umode = Mode.Sum;
+ private final IntegerSummary.Mode imode = Mode.AlwaysOne;
+ private final IntegerSummarySetOperations isso = new IntegerSummarySetOperations(umode, imode);
+ private final IntegerSummaryFactory ufactory = new IntegerSummaryFactory(umode);
+ private final IntegerSummaryFactory ifactory = new IntegerSummaryFactory(imode);
+ private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder();
+ private final UpdatableSketchBuilder<Integer, IntegerSummary> tupleBldr =
+ new UpdatableSketchBuilder<>(ufactory);
+
+
+ @Test
+ public void tuple2dot0Examples() {
+ //Load source sketches
+ final UpdatableSketch<Integer, IntegerSummary> tupleSk = tupleBldr.build();
+ final UpdateSketch thetaSk = thetaBldr.build();
+ for (int i = 1; i <= 12; i++) {
+ tupleSk.update(i, 1);
+ thetaSk.update(i + 3);
+ }
+
+ //Union
+ final Union<IntegerSummary> union = new Union<>(isso);
+ union.update(tupleSk);
+ union.update(thetaSk, ufactory.newSummary().update(1));
+ final CompactSketch<IntegerSummary> ucsk = union.getResult();
+ int entries = ucsk.getRetainedEntries();
+ println("Union: " + entries);
+ final SketchIterator<IntegerSummary> uiter = ucsk.iterator();
+ int counter = 1;
+ while (uiter.next()) {
+ final int i = uiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1
+ }
+
+ //Intersection
+ final Intersection<IntegerSummary> inter = new Intersection<>(isso);
+ inter.update(tupleSk);
+ inter.update(thetaSk, ifactory.newSummary().update(1));
+ final CompactSketch<IntegerSummary> icsk = inter.getResult();
+ entries = icsk.getRetainedEntries();
+ println("Intersection: " + entries);
+ final SketchIterator<IntegerSummary> iiter = icsk.iterator();
+ counter = 1;
+ while (iiter.next()) {
+ final int i = iiter.getSummary().getValue();
+ println(counter++ + ", " + i); //9 entries = 1
+ }
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ System.out.println(s); //enable/disable here
+ }
+}