This replicates the recent additions from KllDoubles to KllFloats.
Specifically weighted updates and vector updates.
diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
index c9be068..a61d19f 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
@@ -116,7 +116,7 @@
return new KllDirectFloatsSketch(UPDATABLE, wMem, memReqSvr, memVal);
}
- //END of Constructors
+ //End of Constructors
@Override
String getItemAsString(final int index) {
@@ -129,54 +129,80 @@
return getMemoryK(wmem);
}
+ //MinMax Methods
+
@Override
public float getMaxItem() {
- int levelsArrBytes = 0;
if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
- else if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); }
- else if (sketchStructure == COMPACT_FULL) {
- levelsArrBytes = getLevelsArrSizeBytes(COMPACT_FULL);
- } else { //UPDATABLE
- levelsArrBytes = getLevelsArrSizeBytes(UPDATABLE);
- }
- final int offset = DATA_START_ADR + levelsArrBytes + ITEM_BYTES;
+ if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); }
+ //either compact-full or updatable
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES;
return wmem.getFloat(offset);
}
@Override
+ float getMaxItemInternal() {
+ if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Float.NaN; }
+ if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); }
+ //either compact-full or updatable
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES;
+ return wmem.getFloat(offset);
+ }
+
+ @Override
String getMaxItemAsString() {
- if (isEmpty()) { return "NaN"; }
- return Float.toString(getMaxItem());
+ final float maxItem = getMaxItemInternal();
+ return Float.toString(maxItem);
}
@Override
public float getMinItem() {
- int levelsArrBytes = 0;
if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
- else if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); }
- else if (sketchStructure == COMPACT_FULL) {
- levelsArrBytes = getLevelsArrSizeBytes(COMPACT_FULL);
- } else { //UPDATABLE
- levelsArrBytes = getLevelsArrSizeBytes(UPDATABLE);
- }
- final int offset = DATA_START_ADR + levelsArrBytes;
+ if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); }
+ //either compact-full or updatable
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure);
+ return wmem.getFloat(offset);
+ }
+
+ @Override
+ float getMinItemInternal() {
+ if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Float.NaN; }
+ if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); }
+ //either compact-full or updatable
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure);
return wmem.getFloat(offset);
}
@Override
String getMinItemAsString() {
- if (isEmpty()) { return "NaN"; }
- return Float.toString(getMinItem());
+ final float minItem = getMinItemInternal();
+ return Float.toString(minItem);
}
@Override
+ void setMaxItem(final float item) {
+ if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES;
+ wmem.putFloat(offset, item);
+ }
+
+ @Override
+ void setMinItem(final float item) {
+ if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure);
+ wmem.putFloat(offset, item);
+ }
+
+ //END MinMax Methods
+
+ @Override
public long getN() {
if (sketchStructure == COMPACT_EMPTY) { return 0; }
else if (sketchStructure == COMPACT_SINGLE) { return 1; }
else { return getMemoryN(wmem); }
}
- //restricted
+ //other restricted
@Override //returns updatable, expanded array including free space at bottom
float[] getFloatItemsArray() {
@@ -317,26 +343,19 @@
}
@Override
+ void setFloatItemsArrayAt(final int index, final float[] items, final int srcOffset, final int length) {
+ if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
+ final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (index + 2) * ITEM_BYTES;
+ wmem.putFloatArray(offset, items, srcOffset, length);
+ }
+
+ @Override
void setLevelZeroSorted(final boolean sorted) {
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
setMemoryLevelZeroSortedFlag(wmem, sorted);
}
@Override
- void setMaxItem(final float item) {
- if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
- final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES;
- wmem.putFloat(offset, item);
- }
-
- @Override
- void setMinItem(final float item) {
- if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
- final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure);
- wmem.putFloat(offset, item);
- }
-
- @Override
void setMinK(final int minK) {
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
setMemoryMinK(wmem, minK);
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
index 5bfaf5d..67035b4 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
@@ -30,8 +30,6 @@
import org.apache.datasketches.memory.WritableMemory;
-//
-//
/**
* Static methods to support KllDoublesSketch
* @author Kevin Lang
@@ -453,14 +451,14 @@
workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop;
assert selfPop >= 0 && otherPop >= 0;
if (selfPop == 0 && otherPop == 0) { continue; }
- else if (selfPop > 0 && otherPop == 0) {
+ if (selfPop > 0 && otherPop == 0) {
System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop);
}
else if (selfPop == 0 && otherPop > 0) {
System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop);
}
else if (selfPop > 0 && otherPop > 0) {
- mergeSortedDoubleArrays( //only workbuf is modified
+ mergeSortedDoubleArrays( //only workBuf is modified
myCurDoubleItemsArr, myCurLevelsArr[lvl], selfPop,
otherDoubleItemsArr, otherLevelsArr[lvl], otherPop,
workBuf, workLevels[lvl]);
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
index 1a71c34..50cadeb 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
@@ -24,15 +24,12 @@
import static org.apache.datasketches.common.Util.isEven;
import static org.apache.datasketches.common.Util.isOdd;
import static org.apache.datasketches.kll.KllHelper.findLevelToCompact;
-import static org.apache.datasketches.kll.KllSketch.DEFAULT_M;
import java.util.Arrays;
import java.util.Random;
import org.apache.datasketches.memory.WritableMemory;
-//
-//
/**
* Static methods to support KllFloatsSketch
* @author Kevin Lang
@@ -59,7 +56,7 @@
* It cannot be used while merging, while reducing k, or anything else.
* @param fltSk the current KllFloatsSketch
*/
- private static void compressWhileUpdatingSketch(final KllFloatsSketch fltSk) {
+ static void compressWhileUpdatingSketch(final KllFloatsSketch fltSk) {
final int level =
findLevelToCompact(fltSk.getK(), fltSk.getM(), fltSk.getNumLevels(), fltSk.levelsArr);
if (level == fltSk.getNumLevels() - 1) {
@@ -128,8 +125,8 @@
//capture my key mutable fields before doing any merging
final boolean myEmpty = mySketch.isEmpty();
- final float myMin = myEmpty ? Float.NaN : mySketch.getMinItem();
- final float myMax = myEmpty ? Float.NaN : mySketch.getMaxItem();
+ final float myMin = mySketch.getMinItemInternal();
+ final float myMax = mySketch.getMaxItemInternal();
final int myMinK = mySketch.getMinK();
final long finalN = Math.addExact(mySketch.getN(), otherFltSk.getN());
@@ -140,12 +137,12 @@
//MERGE: update this sketch with level0 items from the other sketch
if (otherFltSk.isCompactSingleItem()) {
- updateFloat(mySketch, otherFltSk.getFloatSingleItem());
+ KllFloatsSketch.updateFloat(mySketch, otherFltSk.getFloatSingleItem());
otherFloatItemsArr = new float[0];
} else {
otherFloatItemsArr = otherFltSk.getFloatItemsArray();
for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) {
- updateFloat(mySketch, otherFloatItemsArr[i]);
+ KllFloatsSketch.updateFloat(mySketch, otherFloatItemsArr[i]);
}
}
@@ -313,35 +310,6 @@
}
}
- //Called from KllFloatsSketch::update and merge
- static void updateFloat(final KllFloatsSketch fltSk, final float item) {
- fltSk.updateMinMax(item);
- int freeSpace = fltSk.levelsArr[0];
- assert freeSpace >= 0;
- if (freeSpace == 0) {
- compressWhileUpdatingSketch(fltSk);
- freeSpace = fltSk.levelsArr[0];
- assert (freeSpace > 0);
- }
- fltSk.incN(1);
- fltSk.setLevelZeroSorted(false);
- final int nextPos = freeSpace - 1;
- fltSk.setLevelsArrayAt(0, nextPos);
- fltSk.setFloatItemsArrayAt(nextPos, item);
- }
-
- //Called from KllFloatsSketch::update with weight
- static void updateFloat(final KllFloatsSketch fltSk, final float item, final long weight) {
- if (weight < fltSk.levelsArr[0]) {
- for (int i = 0; i < (int)weight; i++) { updateFloat(fltSk, item); }
- } else {
- fltSk.updateMinMax(item);
- final KllHeapFloatsSketch tmpSk = new KllHeapFloatsSketch(fltSk.getK(), DEFAULT_M, item, weight);
-
- fltSk.merge(tmpSk);
- }
- }
-
/**
* Compression algorithm used to merge higher levels.
* <p>Here is what we do for each level:</p>
@@ -465,35 +433,35 @@
}
private static void populateFloatWorkArrays( //workBuf and workLevels are modified
- final float[] workbuf, final int[] worklevels, final int provisionalNumLevels,
+ final float[] workBuf, final int[] workLevels, final int provisionalNumLevels,
final int myCurNumLevels, final int[] myCurLevelsArr, final float[] myCurFloatItemsArr,
final int otherNumLevels, final int[] otherLevelsArr, final float[] otherFloatItemsArr) {
- worklevels[0] = 0;
+ workLevels[0] = 0;
// Note: the level zero data from "other" was already inserted into "self".
// This copies into workbuf.
final int selfPopZero = KllHelper.currentLevelSizeItems(0, myCurNumLevels, myCurLevelsArr);
- System.arraycopy( myCurFloatItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero);
- worklevels[1] = worklevels[0] + selfPopZero;
+ System.arraycopy( myCurFloatItemsArr, myCurLevelsArr[0], workBuf, workLevels[0], selfPopZero);
+ workLevels[1] = workLevels[0] + selfPopZero;
for (int lvl = 1; lvl < provisionalNumLevels; lvl++) {
final int selfPop = KllHelper.currentLevelSizeItems(lvl, myCurNumLevels, myCurLevelsArr);
final int otherPop = KllHelper.currentLevelSizeItems(lvl, otherNumLevels, otherLevelsArr);
- worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop;
+ workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop;
assert selfPop >= 0 && otherPop >= 0;
if (selfPop == 0 && otherPop == 0) { continue; }
if (selfPop > 0 && otherPop == 0) {
- System.arraycopy(myCurFloatItemsArr, myCurLevelsArr[lvl], workbuf, worklevels[lvl], selfPop);
+ System.arraycopy(myCurFloatItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop);
}
else if (selfPop == 0 && otherPop > 0) {
- System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop);
+ System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop);
}
else if (selfPop > 0 && otherPop > 0) {
- mergeSortedFloatArrays(
+ mergeSortedFloatArrays( //only workBuf is modified
myCurFloatItemsArr, myCurLevelsArr[lvl], selfPop,
otherFloatItemsArr, otherLevelsArr[lvl], otherPop,
- workbuf, worklevels[lvl]);
+ workBuf, workLevels[lvl]);
}
}
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
index 2b0c15e..4fcee5e 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
@@ -25,6 +25,7 @@
import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
+import java.util.Arrays;
import java.util.Objects;
import org.apache.datasketches.common.ArrayOfItemsSerDe;
@@ -35,7 +36,7 @@
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.quantilescommon.FloatsSortedView;
+import org.apache.datasketches.quantilescommon.FloatsSketchSortedView;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesFloatsAPI;
import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator;
@@ -46,7 +47,7 @@
* @see org.apache.datasketches.kll.KllSketch
*/
public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloatsAPI {
- private KllFloatsSketchSortedView kllFloatsSV = null;
+ private FloatsSketchSortedView floatsSV = null;
final static int ITEM_BYTES = Float.BYTES;
KllFloatsSketch(
@@ -171,21 +172,21 @@
public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
refreshSortedView();
- return kllFloatsSV.getCDF(splitPoints, searchCrit);
+ return floatsSV.getCDF(splitPoints, searchCrit);
}
@Override
public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
refreshSortedView();
- return kllFloatsSV.getPMF(splitPoints, searchCrit);
+ return floatsSV.getPMF(splitPoints, searchCrit);
}
@Override
public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
refreshSortedView();
- return kllFloatsSV.getQuantile(rank, searchCrit);
+ return floatsSV.getQuantile(rank, searchCrit);
}
@Override
@@ -195,7 +196,7 @@
final int len = ranks.length;
final float[] quantiles = new float[len];
for (int i = 0; i < len; i++) {
- quantiles[i] = kllFloatsSV.getQuantile(ranks[i], searchCrit);
+ quantiles[i] = floatsSV.getQuantile(ranks[i], searchCrit);
}
return quantiles;
}
@@ -224,7 +225,7 @@
public double getRank(final float quantile, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
refreshSortedView();
- return kllFloatsSV.getRank(quantile, searchCrit);
+ return floatsSV.getRank(quantile, searchCrit);
}
/**
@@ -254,20 +255,12 @@
final int len = quantiles.length;
final double[] ranks = new double[len];
for (int i = 0; i < len; i++) {
- ranks[i] = kllFloatsSV.getRank(quantiles[i], searchCrit);
+ ranks[i] = floatsSV.getRank(quantiles[i], searchCrit);
}
return ranks;
}
@Override
- @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "OK in this case.")
- public FloatsSortedView getSortedView() {
- if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
- refreshSortedView();
- return kllFloatsSV;
- }
-
- @Override
public QuantilesFloatsSketchIterator iterator() {
return new KllFloatsSketchIterator(
getFloatItemsArray(), getLevelsArray(SketchStructure.UPDATABLE), getNumLevels());
@@ -280,7 +273,7 @@
final KllFloatsSketch othFltSk = (KllFloatsSketch)other;
if (othFltSk.isEmpty()) { return; }
KllFloatsHelper.mergeFloatImpl(this, othFltSk);
- kllFloatsSV = null;
+ floatsSV = null;
}
/**
@@ -299,7 +292,7 @@
setMinItem(Float.NaN);
setMaxItem(Float.NaN);
setFloatItemsArray(new float[k]);
- kllFloatsSV = null;
+ floatsSV = null;
}
@Override
@@ -318,14 +311,49 @@
return KllHelper.toStringImpl(sketch, withLevels, withLevelsAndItems, getSerDe());
}
+ //SINGLE UPDATE
+
@Override
public void update(final float item) {
if (Float.isNaN(item)) { return; } //ignore
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
- KllFloatsHelper.updateFloat(this, item);
- kllFloatsSV = null;
+ updateFloat(this, item);
+ floatsSV = null;
}
+ //Also Called from KllFloatsHelper::merge
+ static void updateFloat(final KllFloatsSketch fltSk, final float item) {
+ fltSk.updateMinMax(item);
+ int freeSpace = fltSk.levelsArr[0];
+ assert (freeSpace >= 0);
+ if (freeSpace == 0) {
+ KllFloatsHelper.compressWhileUpdatingSketch(fltSk);
+ freeSpace = fltSk.levelsArr[0];
+ assert (freeSpace > 0);
+ }
+ fltSk.incN(1);
+ fltSk.setLevelZeroSorted(false);
+ final int nextPos = freeSpace - 1;
+ fltSk.setLevelsArrayAt(0, nextPos);
+ fltSk.setFloatItemsArrayAt(nextPos, item);
+ }
+
+ /**
+ * Single update of min and max
+ * @param item the source item, it must not be a NaN.
+ */
+ final void updateMinMax(final float item) {
+ if (isEmpty() || Float.isNaN(getMinItemInternal())) {
+ setMinItem(item);
+ setMaxItem(item);
+ } else {
+ setMinItem(min(getMinItemInternal(), item));
+ setMaxItem(max(getMaxItemInternal(), item));
+ }
+ }
+
+ //WEIGHTED UPDATE
+
/**
* Weighted update. Updates this sketch with the given item the number of times specified by the given integer weight.
* @param item the item to be repeated. NaNs are ignored.
@@ -335,12 +363,97 @@
if (Float.isNaN(item)) { return; } //ignore
if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
if (weight < 1L) { throw new SketchesArgumentException("Weight is less than one."); }
- if (weight == 1L) { KllFloatsHelper.updateFloat(this, item); }
- else { KllFloatsHelper.updateFloat(this, item, weight); }
- kllFloatsSV = null;
+ if (weight == 1L) { updateFloat(this, item); }
+ else {
+ if (weight < levelsArr[0]) {
+ for (int i = 0; i < (int)weight; i++) { updateFloat(this, item); }
+ } else {
+ final KllHeapFloatsSketch tmpSk = new KllHeapFloatsSketch(getK(), DEFAULT_M, item, weight);
+ merge(tmpSk);
+ }
+ }
+ floatsSV = null;
}
- //restricted
+ // VECTOR UPDATE
+
+ /**
+ * Vector update. Updates this sketch with the given array (vector) of items, starting at the items
+ * offset for a length number of items. This is not supported for direct sketches.
+ * @param items the vector of items
+ * @param offset the starting index of the items[] array
+ * @param length the number of items
+ */
+ public void update(final float[] items, final int offset, final int length) {
+ if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); }
+ if (length == 0) { return; }
+ if (!hasNaN(items, offset, length)) {
+ updateFloat(items, offset, length); //fast path
+ floatsSV = null;
+ return;
+ }
+ //has at least one NaN
+ final int end = offset + length;
+ for (int i = offset; i < end; i++) {
+ final float v = items[i];
+ if (!Float.isNaN(v)) {
+ updateFloat(this, v); //normal path
+ floatsSV = null;
+ }
+ }
+ }
+
+ // No NaNs are allowed at this point
+ private void updateFloat(final float[] srcItems, final int srcOffset, final int length) {
+ if (isEmpty() || Float.isNaN(getMinItemInternal())) {
+ setMinItem(srcItems[srcOffset]); //initialize with a real value
+ setMaxItem(srcItems[srcOffset]);
+ }
+
+ int count = 0;
+ while (count < length) {
+ if (levelsArr[0] == 0) {
+ KllFloatsHelper.compressWhileUpdatingSketch(this);
+ }
+ final int spaceNeeded = length - count;
+ final int freeSpace = levelsArr[0];
+ assert (freeSpace > 0);
+ final int numItemsToCopy = min(spaceNeeded, freeSpace);
+ final int dstOffset = freeSpace - numItemsToCopy;
+ final int localSrcOffset = srcOffset + count;
+ setFloatItemsArrayAt(dstOffset, srcItems, localSrcOffset, numItemsToCopy);
+ updateMinMax(srcItems, localSrcOffset, numItemsToCopy);
+ count += numItemsToCopy;
+ incN(numItemsToCopy);
+ setLevelsArrayAt(0, dstOffset);
+ }
+ setLevelZeroSorted(false);
+ }
+
+ /**
+ * Vector update of min and max.
+ * @param srcItems the input source array of values, no NaNs allowed.
+ * @param srcOffset the starting offset in srcItems
+ * @param length the number of items to update min and max
+ */
+ private void updateMinMax(final float[] srcItems, final int srcOffset, final int length) {
+ final int end = srcOffset + length;
+ for (int i = srcOffset; i < end; i++) {
+ setMinItem(min(getMinItemInternal(), srcItems[i]));
+ setMaxItem(max(getMaxItemInternal(), srcItems[i]));
+ }
+ }
+
+ // this returns on the first detected NaN.
+ private static boolean hasNaN(final float[] items, final int offset, final int length) {
+ final int end = offset + length;
+ for (int i = offset; i < end; i++) {
+ if (Float.isNaN(items[i])) { return true; }
+ }
+ return false;
+ }
+
+ // END ALL UPDATE METHODS
/**
* @return full size of internal items array including empty space at bottom.
@@ -354,6 +467,16 @@
abstract float getFloatSingleItem();
+ // Min & Max Methods
+
+ abstract float getMaxItemInternal();
+
+ abstract void setMaxItem(float item);
+
+ abstract float getMinItemInternal();
+
+ abstract void setMinItem(float item);
+
@Override
abstract byte[] getMinMaxByteArr();
@@ -362,6 +485,8 @@
return Float.BYTES * 2;
}
+ //END Min & Max Methods
+
@Override
abstract byte[] getRetainedItemsByteArr();
@@ -393,27 +518,152 @@
return levelsArr[getNumLevels()] * Float.BYTES;
}
- private final void refreshSortedView() {
- kllFloatsSV = (kllFloatsSV == null)
- ? new KllFloatsSketchSortedView(this) : kllFloatsSV;
- }
-
abstract void setFloatItemsArray(float[] floatItems);
abstract void setFloatItemsArrayAt(int index, float item);
- abstract void setMaxItem(float item);
+ abstract void setFloatItemsArrayAt(int dstIndex, float[] srcItems, int srcOffset, int length);
- abstract void setMinItem(float item);
+ // SORTED VIEW
- void updateMinMax(final float item) {
- if (isEmpty()) {
- setMinItem(item);
- setMaxItem(item);
- } else {
- setMinItem(min(getMinItem(), item));
- setMaxItem(max(getMaxItem(), item));
+ @Override
+ @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "OK in this case.")
+ public FloatsSketchSortedView getSortedView() {
+ refreshSortedView();
+ return floatsSV;
+ }
+
+ private final FloatsSketchSortedView refreshSortedView() {
+ if (floatsSV == null) {
+ final CreateSortedView csv = new CreateSortedView();
+ floatsSV = csv.getSV();
+ }
+ return floatsSV;
+ }
+
+ private final class CreateSortedView {
+ float[] quantiles;
+ long[] cumWeights;
+
+ FloatsSketchSortedView getSV() {
+ if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
+ final float[] srcQuantiles = getFloatItemsArray();
+ final int[] srcLevels = levelsArr;
+ final int srcNumLevels = getNumLevels();
+
+ if (!isLevelZeroSorted()) {
+ Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]);
+ if (!hasMemory()) { setLevelZeroSorted(true); }
+ //we don't sort level0 in Memory, only our copy.
+ }
+ final int numQuantiles = getNumRetained();
+ quantiles = new float[numQuantiles];
+ cumWeights = new long[numQuantiles];
+ populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles);
+ return new FloatsSketchSortedView(
+ quantiles, cumWeights, getN(), getMaxItemInternal(), getMinItemInternal());
+ }
+
+ private void populateFromSketch(final float[] srcQuantiles, final int[] srcLevels,
+ final int srcNumLevels, final int numItems) {
+ final int[] myLevels = new int[srcNumLevels + 1];
+ final int offset = srcLevels[0];
+ System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems);
+ int srcLevel = 0;
+ int dstLevel = 0;
+ long weight = 1;
+ while (srcLevel < srcNumLevels) {
+ final int fromIndex = srcLevels[srcLevel] - offset;
+ final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive
+ if (fromIndex < toIndex) { // if equal, skip empty level
+ Arrays.fill(cumWeights, fromIndex, toIndex, weight);
+ myLevels[dstLevel] = fromIndex;
+ myLevels[dstLevel + 1] = toIndex;
+ dstLevel++;
+ }
+ srcLevel++;
+ weight *= 2;
+ }
+ final int numLevels = dstLevel;
+ blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels); //create unit weights
+ KllHelper.convertToCumulative(cumWeights);
+ }
+ } //End of class CreateSortedView
+
+ private static void blockyTandemMergeSort(final float[] quantiles, final long[] weights,
+ final int[] levels, final int numLevels) {
+ if (numLevels == 1) { return; }
+
+ // duplicate the input in preparation for the "ping-pong" copy reduction strategy.
+ final float[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length);
+ final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one
+
+ blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels);
+ }
+
+ private static void blockyTandemMergeSortRecursion(
+ final float[] quantilesSrc, final long[] weightsSrc,
+ final float[] quantilesDst, final long[] weightsDst,
+ final int[] levels, final int startingLevel, final int numLevels) {
+ if (numLevels == 1) { return; }
+ final int numLevels1 = numLevels / 2;
+ final int numLevels2 = numLevels - numLevels1;
+ assert numLevels1 >= 1;
+ assert numLevels2 >= numLevels1;
+ final int startingLevel1 = startingLevel;
+ final int startingLevel2 = startingLevel + numLevels1;
+ // swap roles of src and dst
+ blockyTandemMergeSortRecursion(
+ quantilesDst, weightsDst,
+ quantilesSrc, weightsSrc,
+ levels, startingLevel1, numLevels1);
+ blockyTandemMergeSortRecursion(
+ quantilesDst, weightsDst,
+ quantilesSrc, weightsSrc,
+ levels, startingLevel2, numLevels2);
+ tandemMerge(
+ quantilesSrc, weightsSrc,
+ quantilesDst, weightsDst,
+ levels,
+ startingLevel1, numLevels1,
+ startingLevel2, numLevels2);
+ }
+
+ private static void tandemMerge(
+ final float[] quantilesSrc, final long[] weightsSrc,
+ final float[] quantilesDst, final long[] weightsDst,
+ final int[] levelStarts,
+ final int startingLevel1, final int numLevels1,
+ final int startingLevel2, final int numLevels2) {
+ final int fromIndex1 = levelStarts[startingLevel1];
+ final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive
+ final int fromIndex2 = levelStarts[startingLevel2];
+ final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive
+ int iSrc1 = fromIndex1;
+ int iSrc2 = fromIndex2;
+ int iDst = fromIndex1;
+
+ while (iSrc1 < toIndex1 && iSrc2 < toIndex2) {
+ if (quantilesSrc[iSrc1] < quantilesSrc[iSrc2]) {
+ quantilesDst[iDst] = quantilesSrc[iSrc1];
+ weightsDst[iDst] = weightsSrc[iSrc1];
+ iSrc1++;
+ } else {
+ quantilesDst[iDst] = quantilesSrc[iSrc2];
+ weightsDst[iDst] = weightsSrc[iSrc2];
+ iSrc2++;
+ }
+ iDst++;
+ }
+ if (iSrc1 < toIndex1) {
+ System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1);
+ System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1);
+ } else if (iSrc2 < toIndex2) {
+ System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2);
+ System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2);
}
}
+ // END SORTED VIEW
+
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
deleted file mode 100644
index 52320dd..0000000
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.kll;
-
-import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
-import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
-import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank;
-
-import java.util.Arrays;
-
-import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.quantilescommon.FloatsSortedView;
-import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator;
-import org.apache.datasketches.quantilescommon.InequalitySearch;
-import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
-import org.apache.datasketches.quantilescommon.QuantilesUtil;
-
-/**
- * The SortedView of the KllFloatsSketch.
- * @author Alexander Saydakov
- * @author Lee Rhodes
- */
-public final class KllFloatsSketchSortedView implements FloatsSortedView {
- private final float[] quantiles;
- private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights
- private final long totalN;
- private final float maxItem;
- private final float minItem;
-
- /**
- * Construct from elements for testing.
- * @param quantiles sorted array of quantiles
- * @param cumWeights sorted, monotonically increasing cumulative weights.
- * @param totalN the total number of items presented to the sketch.
- */
- KllFloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN,
- final float maxItem, final float minItem) {
- this.quantiles = quantiles;
- this.cumWeights = cumWeights;
- this.totalN = totalN;
- this.maxItem = maxItem;
- this.minItem = minItem;
- }
-
- /**
- * Constructs this Sorted View given the sketch
- * @param sketch the given KllFloatsSketch.
- */
- public KllFloatsSketchSortedView(final KllFloatsSketch sketch) {
- if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
- this.totalN = sketch.getN();
- this.maxItem = sketch.getMaxItem();
- this.minItem = sketch.getMinItem();
- final float[] srcQuantiles = sketch.getFloatItemsArray();
- final int[] srcLevels = sketch.levelsArr;
- final int srcNumLevels = sketch.getNumLevels();
-
- if (!sketch.isLevelZeroSorted()) {
- Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]);
- if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); }
- }
-
- final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove free space
- quantiles = new float[numQuantiles];
- cumWeights = new long[numQuantiles];
- populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles);
- }
-
- @Override
- public long[] getCumulativeWeights() {
- return cumWeights.clone();
- }
-
- @Override
- public float getMaxItem() {
- return maxItem;
- }
-
- @Override
- public float getMinItem() {
- return minItem;
- }
-
- @Override
- public long getN() {
- return totalN;
- }
-
- @Override
- public int getNumRetained() {
- return quantiles.length;
- }
-
- @Override
- public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
- if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
- QuantilesUtil.checkNormalizedRankBounds(rank);
- final int len = cumWeights.length;
- final double naturalRank = getNaturalRank(rank, totalN, searchCrit);
- final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
- final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit);
- if (index == -1) {
- return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
- }
- return quantiles[index];
- }
-
- @Override
- public float[] getQuantiles() {
- return quantiles.clone();
- }
-
- @Override
- public double getRank(final float quantile, final QuantileSearchCriteria searchCrit) {
- if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
- final int len = quantiles.length;
- final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT;
- final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit);
- if (index == -1) {
- return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile
- }
- return (double)cumWeights[index] / totalN;
- }
-
- @Override
- public boolean isEmpty() {
- return totalN == 0;
- }
-
- @Override
- public FloatsSortedViewIterator iterator() {
- return new FloatsSortedViewIterator(quantiles, cumWeights);
- }
-
- //restricted methods
-
- private void populateFromSketch(final float[] srcQuantiles, final int[] srcLevels,
- final int srcNumLevels, final int numItems) {
- final int[] myLevels = new int[srcNumLevels + 1];
- final int offset = srcLevels[0];
- System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems);
- int srcLevel = 0;
- int dstLevel = 0;
- long weight = 1;
- while (srcLevel < srcNumLevels) {
- final int fromIndex = srcLevels[srcLevel] - offset;
- final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive
- if (fromIndex < toIndex) { // if equal, skip empty level
- Arrays.fill(cumWeights, fromIndex, toIndex, weight);
- myLevels[dstLevel] = fromIndex;
- myLevels[dstLevel + 1] = toIndex;
- dstLevel++;
- }
- srcLevel++;
- weight *= 2;
- }
- final int numLevels = dstLevel;
- blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels); //create unit weights
- KllHelper.convertToCumulative(cumWeights);
- }
-
- private static void blockyTandemMergeSort(final float[] quantiles, final long[] weights,
- final int[] levels, final int numLevels) {
- if (numLevels == 1) { return; }
-
- // duplicate the input in preparation for the "ping-pong" copy reduction strategy.
- final float[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length);
- final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here
-
- blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels);
- }
-
- private static void blockyTandemMergeSortRecursion(
- final float[] quantilesSrc, final long[] weightsSrc,
- final float[] quantilesDst, final long[] weightsDst,
- final int[] levels, final int startingLevel, final int numLevels) {
- if (numLevels == 1) { return; }
- final int numLevels1 = numLevels / 2;
- final int numLevels2 = numLevels - numLevels1;
- assert numLevels1 >= 1;
- assert numLevels2 >= numLevels1;
- final int startingLevel1 = startingLevel;
- final int startingLevel2 = startingLevel + numLevels1;
- // swap roles of src and dst
- blockyTandemMergeSortRecursion(
- quantilesDst, weightsDst,
- quantilesSrc, weightsSrc,
- levels, startingLevel1, numLevels1);
- blockyTandemMergeSortRecursion(
- quantilesDst, weightsDst,
- quantilesSrc, weightsSrc,
- levels, startingLevel2, numLevels2);
- tandemMerge(
- quantilesSrc, weightsSrc,
- quantilesDst, weightsDst,
- levels,
- startingLevel1, numLevels1,
- startingLevel2, numLevels2);
- }
-
- private static void tandemMerge(
- final float[] quantilesSrc, final long[] weightsSrc,
- final float[] quantilesDst, final long[] weightsDst,
- final int[] levelStarts,
- final int startingLevel1, final int numLevels1,
- final int startingLevel2, final int numLevels2) {
- final int fromIndex1 = levelStarts[startingLevel1];
- final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive
- final int fromIndex2 = levelStarts[startingLevel2];
- final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive
- int iSrc1 = fromIndex1;
- int iSrc2 = fromIndex2;
- int iDst = fromIndex1;
-
- while (iSrc1 < toIndex1 && iSrc2 < toIndex2) {
- if (quantilesSrc[iSrc1] < quantilesSrc[iSrc2]) {
- quantilesDst[iDst] = quantilesSrc[iSrc1];
- weightsDst[iDst] = weightsSrc[iSrc1];
- iSrc1++;
- } else {
- quantilesDst[iDst] = quantilesSrc[iSrc2];
- weightsDst[iDst] = weightsSrc[iSrc2];
- iSrc2++;
- }
- iDst++;
- }
- if (iSrc1 < toIndex1) {
- System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1);
- System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1);
- } else if (iSrc2 < toIndex2) {
- System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2);
- System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2);
- }
- }
-
-}
diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
index cafefde..cc192b7 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
@@ -163,12 +163,17 @@
@Override
String getItemAsString(final int index) {
if (isEmpty()) { return "NaN"; }
- return Double.toString(floatItems[index]);
+ return Float.toString(floatItems[index]);
}
@Override
public int getK() { return k; }
+ //MinMax Methods
+
+ @Override
+ float getMaxItemInternal() { return maxFloatItem; }
+
@Override
public float getMaxItem() {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
@@ -177,26 +182,43 @@
@Override
String getMaxItemAsString() {
- if (isEmpty()) { return "NaN"; }
return Float.toString(maxFloatItem);
}
@Override
+ float getMinItemInternal() { return minFloatItem; }
+
+ @Override
public float getMinItem() {
- if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
+ if (isEmpty() || Float.isNaN(minFloatItem)) { throw new SketchesArgumentException(EMPTY_MSG); }
return minFloatItem;
}
@Override
String getMinItemAsString() {
- if (isEmpty()) { return "NaN"; }
return Float.toString(minFloatItem);
}
@Override
+ byte[] getMinMaxByteArr() {
+ final byte[] bytesOut = new byte[2 * Float.BYTES];
+ putFloatLE(bytesOut, 0, minFloatItem);
+ putFloatLE(bytesOut, Float.BYTES, maxFloatItem);
+ return bytesOut;
+ }
+
+ @Override
+ void setMaxItem(final float item) { this.maxFloatItem = item; }
+
+ @Override
+ void setMinItem(final float item) { this.minFloatItem = item; }
+
+ //END MinMax Methods
+
+ @Override
public long getN() { return n; }
- //restricted
+ //other restricted
@Override
float[] getFloatItemsArray() { return floatItems; }
@@ -217,14 +239,6 @@
int getMinK() { return minK; }
@Override
- byte[] getMinMaxByteArr() {
- final byte[] bytesOut = new byte[2 * Float.BYTES];
- putFloatLE(bytesOut, 0, minFloatItem);
- putFloatLE(bytesOut, Float.BYTES, maxFloatItem);
- return bytesOut;
- }
-
- @Override
byte[] getRetainedItemsByteArr() {
if (isEmpty()) { return new byte[0]; }
final byte[] bytesOut;
@@ -272,15 +286,14 @@
void setFloatItemsArrayAt(final int index, final float item) { this.floatItems[index] = item; }
@Override
+ void setFloatItemsArrayAt(final int dstIndex, final float[] srcItems, final int srcOffset, final int length) { //TODO
+ System.arraycopy(srcItems, srcOffset, floatItems, dstIndex, length);
+ }
+
+ @Override
void setLevelZeroSorted(final boolean sorted) { this.isLevelZeroSorted = sorted; }
@Override
- void setMaxItem(final float item) { this.maxFloatItem = item; }
-
- @Override
- void setMinItem(final float item) { this.minFloatItem = item; }
-
- @Override
void setMinK(final int minK) { this.minK = minK; }
@Override
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java
index 8930085..564f8aa 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java
@@ -32,7 +32,7 @@
*/
public final class DoublesSketchSortedView implements DoublesSortedView {
private final double[] quantiles;
- private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights
+ private final long[] cumWeights; //cumulative natural weights
private final long totalN;
private final double maxItem;
private final double minItem;
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSketchSortedView.java
new file mode 100644
index 0000000..fbcfc88
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSketchSortedView.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.quantilescommon;
+
+import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
+import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * The SortedView of the Quantiles Classic DoublesSketch and the KllDoublesSketch.
+ * @author Alexander Saydakov
+ * @author Lee Rhodes
+ */
+public class FloatsSketchSortedView implements FloatsSortedView {
+ private final float[] quantiles;
+ private final long[] cumWeights; //cumulative natural weights
+ private final long totalN;
+ private final float maxItem;
+ private final float minItem;
+
+ /**
+ * Construct from elements, also used in testing.
+ * @param quantiles sorted array of quantiles
+ * @param cumWeights sorted, monotonically increasing cumulative weights.
+ * @param totalN the total number of items presented to the sketch.
+ * @param maxItem of type double
+ * @param minItem of type double
+ */
+ public FloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN,
+ final float maxItem, final float minItem) {
+ this.quantiles = quantiles;
+ this.cumWeights = cumWeights;
+ this.totalN = totalN;
+ this.maxItem = maxItem;
+ this.minItem = minItem;
+ }
+
+ @Override
+ public long[] getCumulativeWeights() {
+ return cumWeights.clone();
+ }
+
+ @Override
+ public float getMaxItem() {
+ return maxItem;
+ }
+
+ @Override
+ public float getMinItem() {
+ return minItem;
+ }
+
+ @Override
+ public long getN() {
+ return totalN;
+ }
+
+ @Override
+ public int getNumRetained() {
+ return quantiles.length;
+ }
+
+ @Override
+ public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
+ if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
+ QuantilesUtil.checkNormalizedRankBounds(rank);
+ final int len = cumWeights.length;
+ final double naturalRank = getNaturalRank(rank, totalN, searchCrit);
+ final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
+ final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit);
+ if (index == -1) {
+ return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
+ }
+ return quantiles[index];
+ }
+
+ @Override
+ public float[] getQuantiles() {
+ return quantiles.clone();
+ }
+
+ @Override
+ public double getRank(final float quantile, final QuantileSearchCriteria searchCrit) {
+ if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
+ final int len = quantiles.length;
+ final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT;
+ final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit);
+ if (index == -1) {
+ return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile
+ }
+ return (double)cumWeights[index] / totalN;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return totalN == 0;
+ }
+
+ @Override
+ public FloatsSortedViewIterator iterator() {
+ return new FloatsSortedViewIterator(quantiles, cumWeights);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
index db0a612..5fbcf43 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
@@ -40,7 +40,7 @@
public class ItemsSketchSortedView<T> implements GenericSortedView<T> {
private static final double PARTITIONING_ERROR_FACTOR = 2.0;
private final T[] quantiles;
- private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights
+ private final long[] cumWeights; //cumulative natural weights
private final long totalN;
private final Comparator<? super T> comparator;
private final T maxItem;
diff --git a/src/test/java/org/apache/datasketches/common/TestUtil.java b/src/test/java/org/apache/datasketches/common/TestUtil.java
index 6f837ac..7aab817 100644
--- a/src/test/java/org/apache/datasketches/common/TestUtil.java
+++ b/src/test/java/org/apache/datasketches/common/TestUtil.java
@@ -77,7 +77,7 @@
public static File getResourceFile(final String shortFileName) {
Objects.requireNonNull(shortFileName, "input parameter 'String shortFileName' cannot be null.");
final String slashName = (shortFileName.charAt(0) == '/') ? shortFileName : '/' + shortFileName;
- final URL url = Util.class.getResource(slashName);
+ final URL url = TestUtil.class.getResource(slashName);
Objects.requireNonNull(url, "resource " + slashName + " returns null URL.");
File file;
file = createTempFile(slashName);
@@ -110,7 +110,7 @@
public static byte[] getResourceBytes(final String shortFileName) {
Objects.requireNonNull(shortFileName, "input parameter 'String shortFileName' cannot be null.");
final String slashName = (shortFileName.charAt(0) == '/') ? shortFileName : '/' + shortFileName;
- final URL url = Util.class.getResource(slashName);
+ final URL url = TestUtil.class.getResource(slashName);
Objects.requireNonNull(url, "resource " + slashName + " returns null URL.");
final byte[] out;
if (url.getProtocol().equals("jar")) { //definitely a jar
diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java
index dcc80d6..3d5b316 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java
@@ -638,14 +638,6 @@
}
@Test
- public void checkMergeExceptionsWrongType() {
- KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20);
- KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20);
- try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { }
- try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { }
- }
-
- @Test
public void checkVectorUpdate() {
WritableMemory dstMem = WritableMemory.allocate(6000);
KllDoublesSketch sk = KllDoublesSketch.newDirectInstance(20, dstMem, memReqSvr);
@@ -673,6 +665,14 @@
return ddsk;
}
+ @Test
+ public void checkMergeExceptionsWrongType() {
+ KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20);
+ KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20);
+ try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { }
+ try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { }
+ }
+
private final static boolean enablePrinting = false;
/**
diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java
index 8ab61ed..f4f716e 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java
@@ -268,7 +268,8 @@
public void mergeMinAndMaxFromOther() {
final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0);
final KllFloatsSketch sketch2 = getUpdatableDirectFloatSketch(200, 0);
- for (int i = 1; i <= 1_000_000; i++) {
+ int n = 1_000_000;
+ for (int i = 1; i <= n; i++) {
sketch1.update(i);
}
sketch2.merge(sketch1);
@@ -325,7 +326,7 @@
@Test
public void serializeDeserializeEmptyViaUpdatableWritableWrap() {
final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0);
- final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable
+ final byte[] bytes = KllHelper.toByteArray(sketch1, true);
final KllFloatsSketch sketch2 =
KllFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr);
assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true));
@@ -343,7 +344,7 @@
public void serializeDeserializeOneValueViaCompactHeapify() {
final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0);
sketch1.update(1);
- final byte[] bytes = sketch1.toByteArray(); //compact
+ final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false));
assertFalse(sketch2.isEmpty());
@@ -359,7 +360,7 @@
public void serializeDeserializeOneValueViaUpdatableWritableWrap() {
final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0);
sketch1.update(1);
- final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable
+ final byte[] bytes = KllHelper.toByteArray(sketch1, true);
final KllFloatsSketch sketch2 =
KllFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr);
assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true));
@@ -637,14 +638,25 @@
}
@Test
- public void checkMergeExceptionsWrongType() {
- KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20);
- KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20);
- try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { }
- try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { }
+ public void checkVectorUpdate() {
+ WritableMemory dstMem = WritableMemory.allocate(6000);
+ KllFloatsSketch sk = KllFloatsSketch.newDirectInstance(20, dstMem, memReqSvr);
+ float[] v = new float[21];
+ for (int i = 0; i < 21; i++) { v[i] = i + 1; }
+ sk.update(v, 0, 21);
}
- private static KllFloatsSketch getUpdatableDirectFloatSketch(final int k, final int n) {
+ @Test
+ public void checkWeightedUpdate() {
+ WritableMemory dstMem = WritableMemory.allocate(6000);
+ KllFloatsSketch sk = KllFloatsSketch.newDirectInstance(8, dstMem, memReqSvr);
+ for (int i = 0; i < 16; i++) {
+ sk.update(i + 1, 16);
+ }
+ println(sk.toString(true, true));
+ }
+
+ private static KllFloatsSketch getUpdatableDirectFloatSketch(int k, int n) {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(k);
for (int i = 1; i <= n; i++) { sk.update(i); }
byte[] byteArr = KllHelper.toByteArray(sk, true);
@@ -653,6 +665,14 @@
return dfsk;
}
+ @Test
+ public void checkMergeExceptionsWrongType() {
+ KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20);
+ KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20);
+ try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { }
+ try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { }
+ }
+
private final static boolean enablePrinting = false;
/**
diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java
index 3bbb44b..fe745bc 100644
--- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java
@@ -32,8 +32,10 @@
@Test
public void serializeDeserializeEmpty() {
- final KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance();
- //from heap -> byte[] -> heap
+ final int N = 20;
+
+ final KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(N);
+ //Empty: from heap -> byte[] -> heap
final byte[] bytes = sk1.toByteArray();
final KllFloatsSketch sk2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sk1.getSerializedSizeBytes());
@@ -44,7 +46,8 @@
try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {}
try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {}
assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes());
- //from heap -> byte[] -> off heap
+
+ //Empty: from heap -> byte[] -> off heap
final KllFloatsSketch sk3 = KllFloatsSketch.wrap(Memory.wrap(bytes));
assertTrue(sk3.isEmpty());
assertEquals(sk3.getNumRetained(), sk1.getNumRetained());
@@ -62,6 +65,7 @@
public void serializeDeserializeOneValue() {
final KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance();
sk1.update(1);
+
//from heap -> byte[] -> heap
final byte[] bytes = sk1.toByteArray();
final KllFloatsSketch sk2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
@@ -73,6 +77,7 @@
assertEquals(sk2.getMinItem(), 1.0F);
assertEquals(sk2.getMaxItem(), 1.0F);
assertEquals(sk2.getSerializedSizeBytes(), Long.BYTES + Float.BYTES);
+
//from heap -> byte[] -> off heap
final KllFloatsSketch sk3 = KllFloatsSketch.wrap(Memory.wrap(bytes));
assertFalse(sk3.isEmpty());
@@ -96,6 +101,7 @@
}
assertEquals(sk1.getMinItem(), 0.0f);
assertEquals(sk1.getMaxItem(), 999.0f);
+
//from heap -> byte[] -> heap
final byte[] bytes = sk1.toByteArray();
final KllFloatsSketch sk2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
@@ -107,6 +113,7 @@
assertEquals(sk2.getMinItem(), sk1.getMinItem());
assertEquals(sk2.getMaxItem(), sk1.getMaxItem());
assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes());
+
//from heap -> byte[] -> off heap
final KllFloatsSketch sk3 = KllFloatsSketch.wrap(Memory.wrap(bytes));
assertFalse(sk3.isEmpty());
diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
index 243dd83..652bd51 100644
--- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.kll;
+import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
@@ -37,6 +38,7 @@
import org.testng.annotations.Test;
public class KllFloatsSketchTest {
+ private static final String LS = System.getProperty("line.separator");
private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8
private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128
private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256
@@ -608,6 +610,101 @@
try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { }
}
+ @Test
+ public void checkVectorUpdate() {
+ boolean withLevels = false;
+ boolean withLevelsAndItems = true;
+ int k = 20;
+ int n = 108;//108;
+ int maxVsz = 40; //max vector size
+ KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(k);
+ int j = 1;
+ int rem;
+ while ((rem = n - j + 1) > 0) {
+ int vecSz = min(rem, maxVsz);
+ float[] v = new float[vecSz];
+ for (int i = 0; i < vecSz; i++) { v[i] = j++; }
+ sk.update(v, 0, vecSz);
+ }
+ println(LS + "#<<< END STATE # >>>");
+ println(sk.toString(withLevels, withLevelsAndItems));
+ println("");
+ }
+
+ @Test
+ public void vectorizedUpdates() {
+ final int trials = 1;
+ final int M = 1; //number of vectors
+ final int N = 1000; //vector size
+ final int K = 256;
+ final float[] values = new float[N];
+ float vIn = 1.0F;
+ long totN = 0;
+ final long startTime = System.nanoTime();
+ for (int t = 0; t < trials; t++) {
+ final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(K);
+ for (int m = 0; m < M; m++) {
+ for (int n = 0; n < N; n++) {
+ values[n] = vIn++; //fill vector
+ }
+ sketch.update(values, 0, N); //vector input
+ }
+ totN = sketch.getN();
+ assertEquals(totN, M * N);
+ assertEquals(sketch.getMinItem(), 1.0F);
+ assertEquals(sketch.getMaxItem(), totN);
+ assertEquals(sketch.getQuantile(0.5), (float)(totN / 2.0), totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance
+ }
+ final long runTime = System.nanoTime() - startTime;
+ println("Vectorized Updates");
+ printf(" Vector size : %,12d\n", N);
+ printf(" Num Vectors : %,12d\n", M);
+ printf(" Total Input : %,12d\n", totN);
+ printf(" Run Time mS : %,12.3f\n", runTime / 1e6);
+ final double trialTime = runTime / (1e6 * trials);
+ printf(" mS / Trial : %,12.3f\n", trialTime);
+ final double updateTime = runTime / (1.0 * totN * trials);
+ printf(" nS / Update : %,12.3f\n", updateTime);
+ }
+
+ @Test
+ public void nonVectorizedUpdates() {
+ final int trials = 1;
+ final int M = 1; //number of vectors
+ final int N = 1000; //vector size
+ final int K = 256;
+ final float[] values = new float[N];
+ float vIn = 1.0F;
+ long totN = 0;
+ final long startTime = System.nanoTime();
+ for (int t = 0; t < trials; t++) {
+ final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(K);
+ for (int m = 0; m < M; m++) {
+ for (int n = 0; n < N; n++) {
+ values[n] = vIn++; //fill vector
+ }
+ for (int i = 0; i < N; i++) {
+ sketch.update(values[i]); //single item input
+ }
+ }
+ totN = sketch.getN();
+ assertEquals(totN, M * N);
+ assertEquals(sketch.getMinItem(), 1.0);
+ assertEquals(sketch.getMaxItem(), totN);
+ assertEquals(sketch.getQuantile(0.5), (float)(totN / 2.0), totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance
+ }
+ final long runTime = System.nanoTime() - startTime;
+ println("Vectorized Updates");
+ printf(" Vector size : %,12d\n", N);
+ printf(" Num Vectors : %,12d\n", M);
+ printf(" Total Input : %,12d\n", totN);
+ printf(" Run Time mS : %,12.3f\n", runTime / 1e6);
+ final double trialTime = runTime / (1e6 * trials);
+ printf(" mS / Trial : %,12.3f\n", trialTime);
+ final double updateTime = runTime / (1.0 * totN * trials);
+ printf(" nS / Update : %,12.3f\n", updateTime);
+ }
+
private final static boolean enablePrinting = false;
/**
diff --git a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
index 6049f2f..7e3b071 100644
--- a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
+++ b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
@@ -40,7 +40,6 @@
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.kll.KllDoublesSketch;
import org.apache.datasketches.kll.KllFloatsSketch;
-import org.apache.datasketches.kll.KllFloatsSketchSortedView;
import org.apache.datasketches.kll.KllItemsSketch;
import org.apache.datasketches.kll.KllSketch;
import org.apache.datasketches.quantiles.DoublesSketch;
@@ -142,7 +141,7 @@
ItemsSketch<String> itemsSk = null;
ReqSketchSortedView reqFloatsSV = null;
- KllFloatsSketchSortedView kllFloatsSV = null;
+ FloatsSketchSortedView kllFloatsSV = null;
DoublesSketchSortedView kllDoublesSV = null;
DoublesSketchSortedView classicDoublesSV = null;
ItemsSketchSortedView<String> kllItemsSV = null;
@@ -356,10 +355,10 @@
return (ReqSketchSortedView) REQ_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem);
}
- private final static KllFloatsSketchSortedView getRawKllFloatsSV(
+ private final static FloatsSketchSortedView getRawKllFloatsSV(
final float[] values, final long[] cumWeights, final long totalN, final float maxItem, final float minItem)
throws Exception {
- return (KllFloatsSketchSortedView) KLL_FLOATS_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem);
+ return (FloatsSketchSortedView) KLL_FLOATS_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem);
}
private final static DoublesSketchSortedView getRawDoublesSV(
diff --git a/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java b/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java
index 6dbcac2..e105098 100644
--- a/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java
+++ b/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java
@@ -44,7 +44,7 @@
static {
REQ_SV = getClass("org.apache.datasketches.req.ReqSketchSortedView");
- KLL_FLOATS_SV = getClass("org.apache.datasketches.kll.KllFloatsSketchSortedView");
+ KLL_FLOATS_SV = getClass("org.apache.datasketches.quantilescommon.FloatsSketchSortedView");
DOUBLES_SV = getClass("org.apache.datasketches.quantilescommon.DoublesSketchSortedView");
REQ_SV_CTOR =