Merge pull request #485 from apache/remove_duplicate_code
Remove duplicate code
diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java
index b8d91fe..443ff4a 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java
@@ -65,6 +65,14 @@
levelsArr = memVal.levelsArr; //always converted to writable form.
}
+ //End of constructors
+
+ @Override
+ String getItemAsString(final int index) {
+ if (isEmpty()) { return "Null"; }
+ return serDe.toString(getTotalItemsArray()[index]);
+ }
+
@Override
public int getK() {
return getMemoryK(mem);
@@ -84,6 +92,12 @@
}
@Override
+ String getMaxItemAsString() {
+ if (isEmpty()) { return "Null"; }
+ return serDe.toString(getMaxItem());
+ }
+
+ @Override
public T getMinItem() {
if (sketchStructure == COMPACT_EMPTY || isEmpty()) {
throw new SketchesArgumentException(EMPTY_MSG);
@@ -97,6 +111,12 @@
}
@Override
+ String getMinItemAsString() {
+ if (isEmpty()) { return "Null"; }
+ return serDe.toString(getMinItem());
+ }
+
+ @Override
public long getN() {
if (sketchStructure == COMPACT_EMPTY) { return 0; }
if (sketchStructure == COMPACT_SINGLE) { return 1; }
diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java
index 64e340d..21a4606 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java
@@ -116,7 +116,13 @@
return new KllDirectDoublesSketch(UPDATABLE, wMem, memReqSvr, memVal);
}
- //END of Constructors
+ //End of constructors
+
+ @Override
+ String getItemAsString(final int index) {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(getDoubleItemsArray()[index]);
+ }
@Override
public int getK() {
@@ -138,6 +144,12 @@
}
@Override
+ String getMaxItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(getMaxItem());
+ }
+
+ @Override
public double getMinItem() {
int levelsArrBytes = 0;
if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
@@ -152,6 +164,12 @@
}
@Override
+ String getMinItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(getMinItem());
+ }
+
+ @Override
public long getN() {
if (sketchStructure == COMPACT_EMPTY) { return 0; }
else if (sketchStructure == COMPACT_SINGLE) { return 1; }
diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
index c866514..542eda5 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
@@ -119,6 +119,12 @@
//END of Constructors
@Override
+ String getItemAsString(final int index) {
+ if (isEmpty()) { return "NaN"; }
+ return Float.toString(getFloatItemsArray()[index]);
+ }
+
+ @Override
public int getK() {
return getMemoryK(wmem);
}
@@ -138,6 +144,12 @@
}
@Override
+ String getMaxItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Float.toString(getMaxItem());
+ }
+
+ @Override
public float getMinItem() {
int levelsArrBytes = 0;
if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
@@ -152,6 +164,12 @@
}
@Override
+ String getMinItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Float.toString(getMinItem());
+ }
+
+ @Override
public long getN() {
if (sketchStructure == COMPACT_EMPTY) { return 0; }
else if (sketchStructure == COMPACT_SINGLE) { return 1; }
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
index 49bfe22..a3effa9 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
@@ -309,12 +309,16 @@
dblSk.setMinItem(min(dblSk.getMinItem(), item));
dblSk.setMaxItem(max(dblSk.getMaxItem(), item));
}
- if (dblSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(dblSk); }
- final int myLevelsArrAtZero = dblSk.levelsArr[0]; //LevelsArr could be expanded
+ int level0space = dblSk.levelsArr[0];
+ assert (level0space >= 0);
+ if (level0space == 0) {
+ compressWhileUpdatingSketch(dblSk);
+ level0space = dblSk.levelsArr[0];
+ assert (level0space > 0);
+ }
dblSk.incN();
dblSk.setLevelZeroSorted(false);
- final int nextPos = myLevelsArrAtZero - 1;
- assert myLevelsArrAtZero >= 0;
+ final int nextPos = level0space - 1;
dblSk.setLevelsArrayAt(0, nextPos);
dblSk.setDoubleItemsArrayAt(nextPos, item);
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
index 7c17551..f8cd538 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
@@ -307,6 +307,17 @@
}
@Override
+ public String toString(final boolean withSummary, final boolean withData) {
+ KllSketch sketch = this;
+ if (withData && sketchStructure != UPDATABLE) {
+ final Memory mem = getWritableMemory();
+ assert mem != null;
+ sketch = KllDoublesSketch.heapify(getWritableMemory());
+ }
+ return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe());
+ }
+
+ @Override
public void update(final double item) {
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
KllDoublesHelper.updateDouble(this, item);
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
index 9f732e9..d2a604a 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
@@ -309,12 +309,16 @@
fltSk.setMinItem(min(fltSk.getMinItem(), item));
fltSk.setMaxItem(max(fltSk.getMaxItem(), item));
}
- if (fltSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(fltSk); }
- final int myLevelsArrAtZero = fltSk.levelsArr[0]; //LevelsArr could be expanded
+ int level0space = fltSk.levelsArr[0];
+ assert level0space >= 0;
+ if (level0space == 0) {
+ compressWhileUpdatingSketch(fltSk);
+ level0space = fltSk.levelsArr[0];
+ assert (level0space > 0);
+ }
fltSk.incN();
fltSk.setLevelZeroSorted(false);
- final int nextPos = myLevelsArrAtZero - 1;
- assert myLevelsArrAtZero >= 0;
+ final int nextPos = level0space - 1;
fltSk.setLevelsArrayAt(0, nextPos);
fltSk.setFloatItemsArrayAt(nextPos, item);
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
index 5484e8b..613ecaf 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
@@ -307,6 +307,17 @@
}
@Override
+ public String toString(final boolean withSummary, final boolean withData) {
+ KllSketch sketch = this;
+ if (withData && sketchStructure != UPDATABLE) {
+ final Memory mem = getWritableMemory();
+ assert mem != null;
+ sketch = KllFloatsSketch.heapify(getWritableMemory());
+ }
+ return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe());
+ }
+
+ @Override
public void update(final float item) {
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
KllFloatsHelper.updateFloat(this, item);
diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java
index 61d8243..df81a34 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java
@@ -141,6 +141,14 @@
return new KllHeapDoublesSketch(srcMem, memVal);
}
+ //End of constructors
+
+ @Override
+ String getItemAsString(final int index) {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(doubleItems[index]);
+ }
+
@Override
public int getK() { return k; }
@@ -151,12 +159,24 @@
}
@Override
+ String getMaxItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(maxDoubleItem);
+ }
+
+ @Override
public double getMinItem() {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
return minDoubleItem;
}
@Override
+ String getMinItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(minDoubleItem);
+ }
+
+ @Override
public long getN() { return n; }
//restricted
diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
index 0af7312..4728718 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
@@ -141,6 +141,14 @@
return new KllHeapFloatsSketch(srcMem, memVal);
}
+ //End of constructors
+
+ @Override
+ String getItemAsString(final int index) {
+ if (isEmpty()) { return "NaN"; }
+ return Double.toString(floatItems[index]);
+ }
+
@Override
public int getK() { return k; }
@@ -151,12 +159,24 @@
}
@Override
+ String getMaxItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Float.toString(maxFloatItem);
+ }
+
+ @Override
public float getMinItem() {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
return minFloatItem;
}
@Override
+ String getMinItemAsString() {
+ if (isEmpty()) { return "NaN"; }
+ return Float.toString(minFloatItem);
+ }
+
+ @Override
public long getN() { return n; }
//restricted
diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java
index 4b3c6c7..3ed776f 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java
@@ -117,6 +117,14 @@
}
}
+ //End of constructors
+
+ @Override
+ String getItemAsString(final int index) {
+ if (isEmpty()) { return "Null"; }
+ return serDe.toString((T)(itemsArr[index]));
+ }
+
@Override
public int getK() {
return k;
@@ -129,12 +137,24 @@
}
@Override
+ String getMaxItemAsString() {
+ if (isEmpty()) { return "Null"; }
+ return serDe.toString(maxItem);
+ }
+
+ @Override
public T getMinItem() {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
return minItem;
}
@Override
+ String getMinItemAsString() {
+ if (isEmpty()) { return "Null"; }
+ return serDe.toString(minItem);
+ }
+
+ @Override
public long getN() {
return n;
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java
index 6956ccb..da20654 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java
@@ -47,7 +47,6 @@
import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.common.Util;
import org.apache.datasketches.kll.KllSketch.SketchStructure;
import org.apache.datasketches.kll.KllSketch.SketchType;
import org.apache.datasketches.memory.WritableBuffer;
@@ -58,8 +57,14 @@
*
* @author Lee Rhodes
*/
-@SuppressWarnings("unchecked")
final class KllHelper {
+ public static final String LS = System.getProperty("line.separator");
+ static final double EPS_DELTA_THRESHOLD = 1E-6;
+ static final double MIN_EPS = 4.7634E-5;
+ static final double PMF_COEF = 2.446;
+ static final double PMF_EXP = 0.9433;
+ static final double CDF_COEF = 2.296;
+ static final double CDF_EXP = 0.9723;
static class GrowthStats {
SketchType sketchType;
@@ -85,13 +90,6 @@
}
}
- static final double EPS_DELTA_THRESHOLD = 1E-6;
- static final double MIN_EPS = 4.7634E-5;
- static final double PMF_COEF = 2.446;
- static final double PMF_EXP = 0.9433;
- static final double CDF_COEF = 2.296;
- static final double CDF_EXP = 0.9723;
-
/**
* This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index
*/
@@ -313,7 +311,7 @@
assert (k <= (1 << 29));
assert (numLevels >= 1) && (numLevels <= 61);
assert (level >= 0) && (level < numLevels);
- final int depth = numLevels - level - 1;
+ final int depth = numLevels - level - 1; //depth is # levels from the top level (= 0)
return (int) Math.max(m, intCapAux(k, depth));
}
@@ -359,108 +357,44 @@
return newWmem;
}
- private static <T> String outputItemsData(final int numLevels, final int[] levelsArr, final Object[] itemsArr,
- final ArrayOfItemsSerDe<T> serDe) {
+ private static String outputData(final KllSketch sketch) {
+ final int[] levelsArr = sketch.getLevelsArray(SketchStructure.UPDATABLE);
+ final int numLevels = sketch.getNumLevels();
+ final int k = sketch.getK();
+ final int m = sketch.getM();
final StringBuilder sb = new StringBuilder();
- sb.append("### KLL items data {index, item}:").append(Util.LS);
+ sb.append("### KllSketch itemsArray & levelsArray data:").append(LS);
+ sb.append("Index, Value").append(LS);
if (levelsArr[0] > 0) {
- sb.append(" Empty/Garbage:" + Util.LS);
+ final String gbg = " Empty or Garbage, size = " + levelsArr[0];
for (int i = 0; i < levelsArr[0]; i++) {
- sb.append(" ").append(i + ", ").append(serDe.toString((T)itemsArr[i])).append(Util.LS);
+ sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i));
+ if (i == 0) { sb.append(gbg); }
+ sb.append(LS);
}
}
int level = 0;
while (level < numLevels) {
final int fromIndex = levelsArr[level];
final int toIndex = levelsArr[level + 1]; // exclusive
+ String lvlData = "";
if (fromIndex < toIndex) {
- sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level));
- sb.append(Util.LS);
+ lvlData = " level[" + level + "]=" + levelsArr[level]
+ + ", cap=" + KllHelper.levelCapacity(k, numLevels, level, m)
+ + ", size=" + KllHelper.currentLevelSizeItems(level, numLevels, levelsArr)
+ + ", wt=" + (1 << level) + LS;
}
for (int i = fromIndex; i < toIndex; i++) {
- sb.append(" ").append(i + ", ").append(serDe.toString((T)itemsArr[i])).append(Util.LS);
+ sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i));
+ if (i == fromIndex) { sb.append(lvlData); } else { sb.append(LS); }
}
level++;
}
- sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)");
- sb.append(Util.LS);
- sb.append("### End items data").append(Util.LS);
- return sb.toString();
+ sb.append(" ----------level[" + level + "]=" + levelsArr[level] + ": itemsArray[].length");
+ sb.append(LS);
+ sb.append("### End data").append(LS);
- }
-
- private static String outputDoublesData(final int numLevels, final int[] levelsArr, final double[] doubleItemsArr) {
- final StringBuilder sb = new StringBuilder();
- sb.append("### KLL items data {index, item}:").append(Util.LS);
- if (levelsArr[0] > 0) {
- sb.append(" Empty/Garbage:" + Util.LS);
- for (int i = 0; i < levelsArr[0]; i++) {
- sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS);
- }
- }
- int level = 0;
- while (level < numLevels) {
- final int fromIndex = levelsArr[level];
- final int toIndex = levelsArr[level + 1]; // exclusive
- if (fromIndex < toIndex) {
- sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level));
- sb.append(Util.LS);
- }
-
- for (int i = fromIndex; i < toIndex; i++) {
- sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS);
- }
- level++;
- }
- sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)");
- sb.append(Util.LS);
- sb.append("### End items data").append(Util.LS);
- return sb.toString();
- }
-
- private static String outputFloatsData(final int numLevels, final int[] levelsArr, final float[] floatsItemsArr) {
- final StringBuilder sb = new StringBuilder();
- sb.append("### KLL items data {index, item}:").append(Util.LS);
- if (levelsArr[0] > 0) {
- sb.append(" Empty/Garbage:" + Util.LS);
- for (int i = 0; i < levelsArr[0]; i++) {
- sb.append(" ").append(i + ", ").append(floatsItemsArr[i]).append(Util.LS);
- }
- }
- int level = 0;
- while (level < numLevels) {
- final int fromIndex = levelsArr[level];
- final int toIndex = levelsArr[level + 1]; // exclusive
- if (fromIndex < toIndex) {
- sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level));
- sb.append(Util.LS);
- }
-
- for (int i = fromIndex; i < toIndex; i++) {
- sb.append(" ").append(i + ", ").append(floatsItemsArr[i]).append(Util.LS);
- }
- level++;
- }
- sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)");
- sb.append(Util.LS);
- sb.append("### End items data").append(Util.LS);
- return sb.toString();
- }
-
- static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) {
- final StringBuilder sb = new StringBuilder();
- sb.append("### KLL levels array:").append(Util.LS)
- .append(" level, offset: nominal capacity, actual size").append(Util.LS);
- int level = 0;
- for ( ; level < numLevels; level++) {
- sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ")
- .append(KllHelper.levelCapacity(k, numLevels, level, m))
- .append(", ").append(KllHelper.currentLevelSizeItems(level, numLevels, levelsArr)).append(Util.LS);
- }
- sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)")
- .append(Util.LS);
- sb.append("### End levels array").append(Util.LS);
return sb.toString();
}
@@ -541,7 +475,7 @@
return bytesOut;
}
- static <T> String toStringImpl(final KllSketch sketch, final boolean withLevels, final boolean withData,
+ static <T> String toStringImpl(final KllSketch sketch, final boolean withSummary, final boolean withData,
final ArrayOfItemsSerDe<T> serDe) {
final SketchType sketchType = sketch.sketchType;
final boolean hasMemory = sketch.hasMemory();
@@ -562,70 +496,36 @@
final String skTypeStr = sketchType.getName();
final String className = "Kll" + directStr + compactStr + skTypeStr;
- sb.append(Util.LS).append("### ").append(className).append(" Summary:").append(Util.LS);
- sb.append(" K : ").append(k).append(Util.LS);
- sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(Util.LS);
- sb.append(" M : ").append(m).append(Util.LS);
- sb.append(" N : ").append(n).append(Util.LS);
- sb.append(" Epsilon : ").append(epsPct).append(Util.LS);
- sb.append(" Epsilon PMF : ").append(epsPMFPct).append(Util.LS);
- sb.append(" Empty : ").append(sketch.isEmpty()).append(Util.LS);
- sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(Util.LS);
- sb.append(" Levels : ").append(numLevels).append(Util.LS);
- sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(Util.LS);
- sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(Util.LS);
- sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(Util.LS);
- sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(Util.LS);
- sb.append(" ReadOnly : ").append(readOnlyStr).append(Util.LS);
+ sb.append(LS).append("### ").append(className).append(" Summary:").append(LS);
+ sb.append(" K : ").append(k).append(LS);
+ sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(LS);
+ sb.append(" M : ").append(m).append(LS);
+ sb.append(" N : ").append(n).append(LS);
+ sb.append(" Epsilon : ").append(epsPct).append(LS);
+ sb.append(" Epsilon PMF : ").append(epsPMFPct).append(LS);
+ sb.append(" Empty : ").append(sketch.isEmpty()).append(LS);
+ sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(LS);
+ sb.append(" Levels : ").append(numLevels).append(LS);
+ sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(LS);
+ sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(LS);
+ sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(LS);
+ sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(LS);
+ sb.append(" ReadOnly : ").append(readOnlyStr).append(LS);
if (sketchType != ITEMS_SKETCH) {
- sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true))
- .append(Util.LS);
+ sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)).append(LS);
}
- sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false))
- .append(Util.LS);
+ sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false)).append(LS);
- if (sketchType == DOUBLES_SKETCH) {
- final KllDoublesSketch dblSk = (KllDoublesSketch) sketch;
- sb.append(" Min Item : ").append(dblSk.isEmpty() ? Double.NaN : dblSk.getMinItem())
- .append(Util.LS);
- sb.append(" Max Item : ").append(dblSk.isEmpty() ? Double.NaN : dblSk.getMaxItem())
- .append(Util.LS);
- }
- else if (sketchType == FLOATS_SKETCH) {
- final KllFloatsSketch fltSk = (KllFloatsSketch) sketch;
- sb.append(" Min Item : ").append(fltSk.isEmpty() ? Float.NaN : fltSk.getMinItem())
- .append(Util.LS);
- sb.append(" Max Item : ").append(fltSk.isEmpty() ? Float.NaN : fltSk.getMaxItem())
- .append(Util.LS);
- }
- else { //sketchType == ITEMS_SKETCH
- final KllItemsSketch<T> itmSk = (KllItemsSketch<T>) sketch;
- sb.append(" Min Item : ").append(itmSk.isEmpty() ? "null" : serDe.toString(itmSk.getMinItem()))
- .append(Util.LS);
- sb.append(" Max Item : ").append(itmSk.isEmpty() ? "null" : serDe.toString(itmSk.getMaxItem()))
- .append(Util.LS);
- }
- sb.append("### End sketch summary").append(Util.LS);
+ final String emptyStr = (sketchType == ITEMS_SKETCH) ? "Null" : "NaN";
- if (withLevels) {
- sb.append(outputLevels(k, m, numLevels, fullLevelsArr));
- }
- if (withData) {
- if (sketchType == DOUBLES_SKETCH) {
- final KllDoublesSketch dblSk = (KllDoublesSketch) sketch;
- final double[] myDoubleItemsArr = dblSk.getDoubleItemsArray();
- sb.append(outputDoublesData(numLevels, fullLevelsArr, myDoubleItemsArr));
- } else if (sketchType == FLOATS_SKETCH) {
- final KllFloatsSketch fltSk = (KllFloatsSketch) sketch;
- final float[] myFloatItemsArr = fltSk.getFloatItemsArray();
- sb.append(outputFloatsData(numLevels, fullLevelsArr, myFloatItemsArr));
- }
- else { //sketchType == KllItemsSketch
- final KllItemsSketch<T> itmSk = (KllItemsSketch<T>) sketch;
- final T[] myItemsArr = itmSk.getTotalItemsArray();
- sb.append(outputItemsData(numLevels, fullLevelsArr, myItemsArr, serDe));
- }
- }
+ sb.append(" Min Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMinItemAsString())
+ .append(LS);
+ sb.append(" Max Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMaxItemAsString())
+ .append(LS);
+ sb.append("### End sketch summary").append(LS);
+
+ if (! withSummary) { sb.setLength(0); }
+ if (withData) { sb.append(outputData(sketch)); }
return sb.toString();
}
@@ -789,8 +689,9 @@
/**
* Computes the actual item capacity of a given level given its depth index.
* If the depth of levels exceeds 30, this uses a folding technique to accurately compute the
- * actual level capacity up to a depth of 60. Without folding, the internal calculations would
- * exceed the capacity of a long.
+ * actual level capacity up to a depth of 60 (or 61 levels).
+ * Without folding, the internal calculations would exceed the capacity of a long.
+ * This method just decides whether folding is required or not.
* @param k the configured k of the sketch
* @param depth the zero-based index of the level being computed.
* @return the actual capacity of a given level given its depth index.
@@ -806,31 +707,32 @@
/**
* Performs the integer based calculation of an individual level (or folded level).
* @param k the configured k of the sketch
- * @param depth depth the zero-based index of the level being computed.
+ * @param depth the zero-based index of the level being computed. The max depth is 30!
* @return the actual capacity of a given level given its depth index.
*/
private static long intCapAuxAux(final long k, final int depth) {
- final long twok = k << 1; // for rounding pre-multiply by 2
- final long tmp = ((twok << depth) / powersOfThree[depth]);
- final long result = ((tmp + 1L) >>> 1); // add 1 and divide by 2
+ final long twok = k << 1; // for rounding at the end, pre-multiply by 2 here, divide by 2 during rounding.
+ final long tmp = ((twok << depth) / powersOfThree[depth]); //2k* (2/3)^depth. 2k also keeps the fraction larger.
+ final long result = ((tmp + 1L) >>> 1); // (tmp + 1)/2. If odd, round up. This guarantees an integer.
assert (result <= k);
return result;
}
+ private final static boolean enablePrinting = false;
+
/**
- * @param fmt format
- * @param args arguments
+ * @param format the format
+ * @param args the args
*/
- private static void printf(final String fmt, final Object ... args) {
- //System.out.printf(fmt, args); //Disable
+ private static final void printf(final String format, final Object ... args) {
+ if (enablePrinting) { System.out.printf(format, args); }
}
/**
- * Println Object o
- * @param o object to print
+ * @param o the Object to println
*/
- private static void println(final Object o) {
- //System.out.println(o.toString()); //Disable
+ private static final void println(final Object o) {
+ if (enablePrinting) { System.out.println(o.toString()); }
}
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
index 03bc931..502d432 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
@@ -305,12 +305,16 @@
itmSk.setMinItem(Util.minT(itmSk.getMinItem(), item, comp));
itmSk.setMaxItem(Util.maxT(itmSk.getMaxItem(), item, comp));
}
- if (itmSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(itmSk); }
- final int myLevelsArrAtZero = itmSk.levelsArr[0]; //LevelsArr could be expanded
+ int level0space = itmSk.levelsArr[0];
+ assert level0space >= 0;
+ if (level0space == 0) {
+ compressWhileUpdatingSketch(itmSk);
+ level0space = itmSk.levelsArr[0];
+ assert (level0space > 0);
+ }
itmSk.incN();
itmSk.setLevelZeroSorted(false);
- final int nextPos = myLevelsArrAtZero - 1;
- assert myLevelsArrAtZero >= 0;
+ final int nextPos = level0space - 1;
itmSk.setLevelsArrayAt(0, nextPos);
itmSk.setItemsArrayAt(nextPos, item);
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
index f0e923f..589c1fa 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
@@ -276,6 +276,17 @@
}
@Override
+ public String toString(final boolean withSummary, final boolean withData) {
+ KllSketch sketch = this;
+ if (withData && sketchStructure != UPDATABLE) {
+ final Memory mem = getWritableMemory();
+ assert mem != null;
+ sketch = KllItemsSketch.heapify((Memory)getWritableMemory(), comparator, serDe);
+ }
+ return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe());
+ }
+
+ @Override
public void update(final T item) {
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
KllItemsHelper.updateItem(this, item, comparator);
diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java
index 874d64f..67f6bb9 100644
--- a/src/main/java/org/apache/datasketches/kll/KllSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java
@@ -131,6 +131,13 @@
}
/**
+ * Gets the string value of the item at the given index.
+ * @param index the index of the value
+ * @return the string value of the item at the given index.
+ */
+ abstract String getItemAsString(int index);
+
+ /**
* Gets the approximate <em>k</em> to use given epsilon, the normalized rank error.
* @param epsilon the normalized rank error between zero and one.
* @param pmf if true, this function returns the <em>k</em> assuming the input epsilon
@@ -160,6 +167,18 @@
}
/**
+ * Gets the string value of the max item
+ * @return the string value of the max item
+ */
+ abstract String getMaxItemAsString();
+
+ /**
+ * Gets the string value of the min item
+ * @return the string value of the min item
+ */
+ abstract String getMinItemAsString();
+
+ /**
* Gets the normalized rank error given k and pmf.
* Static method version of the <i>getNormalizedRankError(boolean)</i>.
* The epsilon returned is a best fit to 99 percent confidence empirically measured max error
@@ -262,17 +281,17 @@
@Override
public final String toString() {
- return toString(false, false);
+ return toString(true, false);
}
/**
* Returns a summary of the sketch as a string.
- * @param withLevels if true include information about levels
+ * @param withSummary if true includes sketch summary information
* @param withData if true include sketch data
* @return string representation of sketch summary
*/
- public String toString(final boolean withLevels, final boolean withData) {
- return KllHelper.toStringImpl(this, withLevels, withData, getSerDe());
+ public String toString(final boolean withSummary, final boolean withData) {
+ return KllHelper.toStringImpl(this, withSummary, withData, getSerDe());
}
//restricted
diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java b/src/main/java/org/apache/datasketches/partitions/Partitioner.java
index be256e4..9bc3eee 100644
--- a/src/main/java/org/apache/datasketches/partitions/Partitioner.java
+++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java
@@ -25,6 +25,7 @@
import static java.lang.Math.min;
import static java.lang.Math.pow;
import static java.lang.Math.round;
+import static java.util.Collections.unmodifiableList;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
@@ -116,7 +117,7 @@
final StackElement<T> se = new StackElement<>(gpb, 0, "1");
stack.push(se);
partitionSearch(stack);
- return finalPartitionList;
+ return unmodifiableList(finalPartitionList);
}
private void partitionSearch(final ArrayDeque<StackElement<T>> stack) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
index 4db8514..5c0098a 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
@@ -48,7 +48,7 @@
final T minItem,
final QuantileSearchCriteria searchCrit) {
this.totalN = totalN;
- this.boundaries = boundaries;
+ this.boundaries = boundaries; //SpotBugs copy
this.natRanks = natRanks;
this.normRanks = normRanks;
this.maxItem = maxItem;
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java
index 31a5bed..2134840 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java
@@ -75,8 +75,8 @@
double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit);
/**
- * Returns the maximum item of the stream. This is provided for convenience, but may be different from the largest
- * item retained by the sketch algorithm.
+ * Returns the maximum item of the stream. This is provided for convenience and may be different from the
+ * item returned by <i>getQuantile(1.0)</i>.
*
* @return the maximum item of the stream
* @throws IllegalArgumentException if sketch is empty.
@@ -84,8 +84,8 @@
double getMaxItem();
/**
- * Returns the minimum item of the stream. This is provided for convenience, but is distinct from the smallest
- * item retained by the sketch algorithm.
+ * Returns the minimum item of the stream. This is provided for convenience and may be different from the
+ * item returned by <i>getQuantile(0.0)</i>.
*
* @return the minimum item of the stream
* @throws IllegalArgumentException if sketch is empty.
@@ -297,39 +297,4 @@
*/
void update(double item);
- /**
- * This encapsulates the essential information needed to construct actual partitions and is returned from the
- * <i>getPartitionBoundaries(int, QuantileSearchCritera)</i> method.
- */
- static class DoublesPartitionBoundaries {
-
- /**
- * The total number of items presented to the sketch.
- *
- * <p>To compute the weight or density of a specific
- * partition <i>i</i> where <i>i</i> varies from 1 to <i>m</i> partitions:
- * <pre>{@code
- * long N = getN();
- * double[] ranks = getRanks();
- * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
- * }</pre>
- */
- public long N;
-
- /**
- * The normalized ranks that correspond to the returned boundaries.
- * The returned array is of size <i>(m + 1)</i>, where <i>m</i> is the requested number of partitions.
- * Index 0 of the returned array is always 0.0, and index <i>m</i> is always 1.0.
- */
- public double[] ranks;
-
- /**
- * The partition boundaries as quantiles.
- * The returned array is of size <i>(m + 1)</i>, where <i>m</i> is the requested number of partitions.
- * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index <i>m</i> is always
- * {@link #getMaxItem() getMaxItem()}.
- */
- public double[] boundaries;
- }
}
-
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
index 2fcbdd9..9867804 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
@@ -296,40 +296,5 @@
*/
void update(float item);
- /**
- * This encapsulates the essential information needed to construct actual partitions and is returned from the
- * <i>getPartitionBoundaries(int, QuantileSearchCritera)</i> method.
- */
-
- static class FloatsPartitionBoundaries {
-
- /**
- * The total number of items presented to the sketch.
- *
- * <p>To compute the weight or density of a specific
- * partition <i>i</i> where <i>i</i> varies from 1 to <i>m</i> partitions:
- * <pre>{@code
- * long N = getN();
- * double[] ranks = getRanks();
- * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
- * }</pre>
- */
- public long N;
-
- /**
- * The normalized ranks that correspond to the returned boundaries.
- * The returned array is of size <i>(m + 1)</i>, where <i>m</i> is the requested number of partitions.
- * Index 0 of the returned array is always 0.0, and index <i>m</i> is always 1.0.
- */
- public double[] ranks;
-
- /**
- * The partition boundaries as quantiles.
- * The returned array is of size <i>(m + 1)</i>, where <i>m</i> is the requested number of partitions.
- * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index <i>m</i> is always
- * {@link #getMaxItem() getMaxItem()}.
- */
- public float[] boundaries;
- }
}
diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java
index fe20808..79f2b5e 100644
--- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.kll;
+import static org.apache.datasketches.kll.KllHelper.getGrowthSchemeForGivenN;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
@@ -27,6 +28,7 @@
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.kll.KllDirectDoublesSketch.KllDirectCompactDoublesSketch;
+import org.apache.datasketches.kll.KllSketch.SketchType;
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.MemoryRequestServer;
@@ -38,6 +40,7 @@
/**
* @author Lee Rhodes
*/
+@SuppressWarnings("unused")
public class KllMiscDoublesTest {
static final String LS = System.getProperty("line.separator");
private final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer();
@@ -168,21 +171,85 @@
public void viewHeapCompactions() {
int k = 20;
int n = 108;
+ boolean withSummary = false;
+ boolean withData = true;
int compaction = 0;
- KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k);
+ WritableMemory wmem = WritableMemory.allocate(1 << 20);
+ MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer();
+ KllDoublesSketch sk = KllDoublesSketch.newDirectInstance(k, wmem, memReqSvr);
for (int i = 1; i <= n; i++) {
sk.update(i);
if (sk.levelsArr[0] == 0) {
println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>");
- println(sk.toString(true, true));
+ println(sk.toString(withSummary, withData));
sk.update(++i);
println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>");
- println(sk.toString(true, true));
+ println(sk.toString(withSummary, withData));
assertEquals(sk.getDoubleItemsArray()[sk.levelsArr[0]], i);
}
}
+ println(LS + "#<<< END STATE # >>>");
+ println(sk.toString(withSummary, withData));
+ println("");
}
+ @Test
+ public void viewCompactSketchData() {
+ int k = 20;
+ int n = 109;
+ boolean withSummary = true;
+ boolean withData = true;
+ KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k);
+ for (int i = 1; i <= n; i++) { sk.update(i); }
+ byte[] byteArr = sk.toByteArray();
+ Memory mem = Memory.wrap(byteArr);
+ KllDoublesSketch ddSk = KllDoublesSketch.wrap(mem);
+ println(ddSk.toString(withSummary, withData));
+ }
+
+ //@Test //set static enablePrinting = true for visual checking
+ // // must also make KllHelper.intCapAux(...) visible
+ // public void checkIntCapAux() {
+ // String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"};
+ // String hdrFmt = "%6s %6s %28s %10s %10s %34s\n";
+ // String dataFmt = "%6d %6d %,28d %,10d %,10d %,34.0f\n";
+ // int k = 1000;
+ // int m = 8;
+ // int numLevels = 20;
+ // println("k=" + k + ", m=" + m + ", numLevels=" + numLevels);
+ // printf(hdrFmt, (Object[]) hdr);
+ // double maxN = 0;
+ // for (int i = 0; i < numLevels; i++) {
+ // int depth = numLevels - i - 1;
+ // long cap = KllHelper.intCapAux(k, depth);
+ // long end = Math.max(m, cap);
+ // long wt = 1L << i;
+ // maxN += (double)wt * (double)end;
+ // printf(dataFmt, i, depth, wt, cap, end, maxN);
+ // }
+ // }
+
+ //@Test //set static enablePrinting = true for visual checking
+ // // must also make KllHelper.powersOfThree visible
+ // public void checkIntCapAuxAux() {
+ // String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"};
+ // String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s\n";
+ // String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d\n";
+ // long k = (1L << 16) - 1L;
+ // long m = 8;
+ // println("k = " + k + ", m = " + m);
+ // printf(hdrFmt, (Object[]) hdr);
+ // for (int i = 0; i < 31; i++) {
+ // long twoK = k << 1;
+ // long twoKxtwoD = twoK << i;
+ // long threeToD = KllHelper.powersOfThree[i];
+ // long tmp = twoKxtwoD / threeToD;
+ // long result = (tmp + 1L) >>> 1;
+ // long end = Math.max(m, result); //performed later
+ // printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end);
+ // }
+ // }
+
@Test //set static enablePrinting = true for visual checking
public void viewDirectCompactions() {
int k = 20;
diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml
index 6ce8f01..62cf08f 100644
--- a/tools/FindBugsExcludeFilter.xml
+++ b/tools/FindBugsExcludeFilter.xml
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -23,6 +24,10 @@
<Match>
<Class name="~.*\.*Test" />
</Match>
+
+ <Match>
+ <Class name="~.*\.*TestUtil" />
+ </Match>
<!-- Harmless, Too many False Positives May 2, 2023-->
<Match>
@@ -45,7 +50,32 @@
</Match>
<Match>
+ <Bug pattern="EI_EXPOSE_REP"/>
+ <Class name="org.apache.datasketches.quantilescommon.GenericPartitionBoundaries"/>
+ </Match>
+
+ <Match>
<Bug pattern="EI_EXPOSE_REP2"/>
+ <Class name="org.apache.datasketches.quantilescommon.FloatsSortedViewIterator"/>
+ </Match>
+
+ <Match>
+ <Bug pattern="EI_EXPOSE_REP2"/>
+ <Class name="org.apache.datasketches.quantilescommon.DoublesSortedViewIterator"/>
+ </Match>
+
+ <Match>
+ <Bug pattern="EI_EXPOSE_REP2"/>
+ <Class name="org.apache.datasketches.quantilescommon.GenericSortedViewIterator"/>
+ </Match>
+
+ <Match>
+ <Bug pattern="EI_EXPOSE_REP2"/>
+ <Class name="org.apache.datasketches.quantilescommon.GenericPartitionBoundaries"/>
+ </Match>
+
+ <Match> <!-- False Positive: Inside scope of TWR -->
+ <Bug pattern="OBL_UNSATISFIED_OBLIGATION" />
<Class name="org.apache.datasketches.quantilescommon.GenericSortedViewIterator"/>
</Match>