src/main/java/org/apache/datasketches/quantiles/Util.java - datasketches-java - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.datasketches.quantiles;

 import static java.lang.Math.abs;
 import static java.lang.Math.ceil;
 import static java.lang.Math.exp;
 import static java.lang.Math.log;
 import static java.lang.Math.max;
 import static java.lang.Math.min;
 import static java.lang.Math.pow;
 import static java.lang.Math.round;
 import static org.apache.datasketches.Util.ceilingPowerOf2;
 import static org.apache.datasketches.Util.isPowerOf2;
 import static org.apache.datasketches.quantiles.DoublesSketch.MAX_K;
 import static org.apache.datasketches.quantiles.DoublesSketch.MIN_K;
 import static org.apache.datasketches.quantiles.PreambleUtil.COMPACT_FLAG_MASK;
 import static org.apache.datasketches.quantiles.PreambleUtil.EMPTY_FLAG_MASK;
 import static org.apache.datasketches.quantiles.PreambleUtil.ORDERED_FLAG_MASK;
 import static org.apache.datasketches.quantiles.PreambleUtil.READ_ONLY_FLAG_MASK;
 import static org.apache.datasketches.quantiles.PreambleUtil.extractFlags;

 import org.apache.datasketches.Family;
 import org.apache.datasketches.SketchesArgumentException;
 import org.apache.datasketches.memory.Memory;

 /**
  * Utility class for quantiles sketches.
  *
  * <p>This class contains a highly specialized sort called blockyTandemMergeSort().
  * It also contains methods that are used while building histograms and other common
  * functions.</p>
  *
  * @author Lee Rhodes
  */
 final class Util {

   private Util() {}

   /**
    * The java line separator character as a String.
    */
   static final String LS = System.getProperty("line.separator");

   /**
    * The tab character
    */
   static final char TAB = '\t';

   /**
    * Computes the raw delta area between two quantile sketches for the
    * {@link #kolmogorovSmirnovTest(DoublesSketch, DoublesSketch, double)
    * Kolmogorov-Smirnov Test}
    * method.
    * @param sketch1 Input DoubleSketch 1
    * @param sketch2 Input DoubleSketch 2
    * @return the raw delta area between two quantile sketches
    */
   public static double computeKSDelta(final DoublesSketch sketch1,
       final DoublesSketch sketch2) {
     final DoublesAuxiliary p = new DoublesAuxiliary(sketch1);
     final DoublesAuxiliary q = new DoublesAuxiliary(sketch2);

     final double[] pSamplesArr = p.auxSamplesArr_;
     final double[] qSamplesArr = q.auxSamplesArr_;
     final long[] pCumWtsArr = p.auxCumWtsArr_;
     final long[] qCumWtsArr = q.auxCumWtsArr_;
     final int pSamplesArrLen = pSamplesArr.length;
     final int qSamplesArrLen = qSamplesArr.length;

     final double n1 = sketch1.getN();
     final double n2 = sketch2.getN();

     //Compute D from the two distributions
     double deltaArea = 0.0;
     int i = getNextIndex(pSamplesArr, -1);
     int j = getNextIndex(qSamplesArr, -1);

     // We're done if either array reaches the end
     while ((i < pSamplesArrLen) && (j < qSamplesArrLen)) {
       final double pSample = pSamplesArr[i];
       final double qSample = qSamplesArr[j];
       final long pWt = pCumWtsArr[i];
       final long qWt = qCumWtsArr[j];
       final double pNormWt = pWt / n1;
       final double qNormWt = qWt / n2;
       final double pMinusQ = Math.abs(pNormWt - qNormWt);
       final double curD = deltaArea;
       deltaArea = Math.max(curD, pMinusQ);

       //Increment i or j or both
       if (pSample == qSample) {
         i = getNextIndex(pSamplesArr, i);
         j = getNextIndex(qSamplesArr, j);
       } else if (pSample < qSample) {
         i = getNextIndex(pSamplesArr, i);
       } else {
         j = getNextIndex(qSamplesArr, j);
       }
     }

     //This is D, the delta difference in area of the two distributions
     deltaArea = Math.max(deltaArea, Math.abs((pCumWtsArr[i] / n1) - (qCumWtsArr[j] / n2)));
     return deltaArea;
   }

   /**
    * Computes the adjusted delta area threshold for the
    * {@link #kolmogorovSmirnovTest(DoublesSketch, DoublesSketch, double) Kolmogorov-Smirnov Test}
    * method.
    * This adjusts the computed threshold by the error epsilons of the two given sketches.
    * See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
    * @param sketch1 Input DoubleSketch 1
    * @param sketch2 Input DoubleSketch 2
    * @param tgtPvalue Target p-value. Typically .001 to .1, e.g., .05.
    * @return the adjusted threshold to be compared with the raw delta area.
    */
   public static double computeKSThreshold(final DoublesSketch sketch1,
                                            final DoublesSketch sketch2,
                                            final double tgtPvalue) {
     final double r1 = sketch1.getRetainedItems();
     final double r2 = sketch2.getRetainedItems();
     final double alpha = tgtPvalue;
     final double alphaFactor = Math.sqrt(-0.5 * Math.log(0.5 * alpha));
     final double deltaAreaThreshold = alphaFactor * Math.sqrt((r1 + r2) / (r1 * r2));
     final double eps1 = Util.getNormalizedRankError(sketch1.getK(), false);
     final double eps2 = Util.getNormalizedRankError(sketch2.getK(), false);

     final double adjDeltaAreaThreshold = deltaAreaThreshold + eps1 + eps2;
     return adjDeltaAreaThreshold;
   }

   /**
    * Performs the Kolmogorov-Smirnov Test between two quantiles sketches.
    * Note: if the given sketches have insufficient data or if the sketch sizes are too small,
    * this will return false.
    * @param sketch1 Input DoubleSketch 1
    * @param sketch2 Input DoubleSketch 2
    * @param tgtPvalue Target p-value. Typically .001 to .1, e.g., .05.
    * @return Boolean indicating whether we can reject the null hypothesis (that the sketches
    * reflect the same underlying distribution) using the provided tgtPValue.
    */
   public static boolean kolmogorovSmirnovTest(final DoublesSketch sketch1,
       final DoublesSketch sketch2, final double tgtPvalue) {
     final double delta = computeKSDelta(sketch1, sketch2);
     final double thresh = computeKSThreshold(sketch1, sketch2, tgtPvalue);
     return delta > thresh;
   }

   private static final int getNextIndex(final double[] samplesArr, final int stIdx) {
     int idx = stIdx + 1;
     final int samplesArrLen = samplesArr.length;

     if (idx >= samplesArrLen) { return samplesArrLen; }

     // if we have a sequence of equal values, use the last one of the sequence
     final double val = samplesArr[idx];
     int nxtIdx = idx + 1;
     while ((nxtIdx < samplesArrLen) && (samplesArr[nxtIdx] == val)) {
       idx = nxtIdx;
       ++nxtIdx;
     }
     return idx;
   }

   /**
    * Gets the normalized rank error given k and pmf for the Quantiles DoubleSketch and ItemsSketch.
    * @param k the configuation parameter
    * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
    * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
    * @return if pmf is true, the normalized rank error for the getPMF() function.
    * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
    */
   // constants were derived as the best fit to 99 percentile empirically measured max error in
   // thousands of trials
   public static double getNormalizedRankError(final int k, final boolean pmf) {
     return pmf
         ? 1.854 / pow(k, 0.9657)
         : 1.576 / pow(k, 0.9726);
   }

   /**
    * Gets the approximate value of <em>k</em> to use given epsilon, the normalized rank error
    * for the Quantiles DoubleSketch and ItemsSketch.
    * @param epsilon the normalized rank error between zero and one.
    * @param pmf if true, this function returns the value of <em>k</em> assuming the input epsilon
    * is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function
    * returns the value of <em>k</em> assuming the input epsilon is the desired "single-sided"
    * epsilon for all the other queries.
    * @return the value of <i>k</i> given a value of epsilon.
    */
   // constants were derived as the best fit to 99 percentile empirically measured max error in
   // thousands of trials
   public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
     //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false.
     final double eps = max(epsilon, 6.395E-5);
     final double kdbl = pmf
         ? exp(log(1.854 / eps) / 0.9657)
         : exp(log(1.576 / eps) / 0.9726);
     final double krnd = round(kdbl);
     final double del = abs(krnd - kdbl);
     //round to closest int if within 1 ppm of the int, otherwise use the ceiling.
     final int k = (int) ((del < 1E-6) ? krnd : ceil(kdbl));
     return max(MIN_K, min(MAX_K, k));
   }

   /**
    * Checks the validity of the given value k
    * @param k must be greater than 1 and less than 65536 and a power of 2.
    */
   static void checkK(final int k) {
     if ((k < MIN_K) || (k > MAX_K) || !isPowerOf2(k)) {
       throw new SketchesArgumentException(
           "K must be >= " + MIN_K + " and <= " + MAX_K + " and a power of 2: " + k);
     }
   }

   /**
    * Checks the validity of the given family ID
    * @param familyID the given family ID
    */
   static void checkFamilyID(final int familyID) {
     final Family family = Family.idToFamily(familyID);
     if (!family.equals(Family.QUANTILES)) {
       throw new SketchesArgumentException(
           "Possible corruption: Invalid Family: " + family.toString());
     }
   }

   /**
    * Checks the consistency of the flag bits and the state of preambleLong and the memory
    * capacity and returns the empty state.
    * @param preambleLongs the size of preamble in longs
    * @param flags the flags field
    * @param memCapBytes the memory capacity
    * @return the value of the empty state
    */
   static boolean checkPreLongsFlagsCap(final int preambleLongs, final int flags, final long memCapBytes) {
     final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; //Preamble flags empty state
     final int minPre = Family.QUANTILES.getMinPreLongs(); //1
     final int maxPre = Family.QUANTILES.getMaxPreLongs(); //2
     final boolean valid = ((preambleLongs == minPre) && empty) || ((preambleLongs == maxPre) && !empty);
     if (!valid) {
       throw new SketchesArgumentException(
           "Possible corruption: PreambleLongs inconsistent with empty state: " + preambleLongs);
     }
     checkHeapFlags(flags);
     if (memCapBytes < (preambleLongs << 3)) {
       throw new SketchesArgumentException(
           "Possible corruption: Insufficient capacity for preamble: " + memCapBytes);
     }
     return empty;
   }

   /**
    * Checks just the flags field of the preamble. Allowed flags are Read Only, Empty, Compact, and
    * ordered.
    * @param flags the flags field
    */
   static void checkHeapFlags(final int flags) {  //only used by checkPreLongsFlagsCap and test
     final int allowedFlags =
         READ_ONLY_FLAG_MASK | EMPTY_FLAG_MASK | COMPACT_FLAG_MASK | ORDERED_FLAG_MASK;
     final int flagsMask = ~allowedFlags;
     if ((flags & flagsMask) > 0) {
       throw new SketchesArgumentException(
          "Possible corruption: Invalid flags field: " + Integer.toBinaryString(flags));
     }
   }

   /**
    * Checks just the flags field of an input Memory object. Returns true for a compact
    * sketch, false for an update sketch. Does not perform additional checks, including sketch
    * family.
    * @param srcMem the source Memory containing a sketch
    * @return true if flags indicate a compact sketch, otherwise false
    */
   static boolean checkIsCompactMemory(final Memory srcMem) {
     // only reading so downcast is ok
     final int flags = extractFlags(srcMem);
     final int compactFlags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK;
     return (flags & compactFlags) > 0;
   }

   /**
    * Checks the sequential validity of the given array of double values.
    * They must be unique, monotonically increasing and not NaN.
    * @param values the given array of double values
    */
   static final void checkSplitPointsOrder(final double[] values) {
     if (values == null) {
       throw new SketchesArgumentException("Values cannot be null.");
     }
     final int lenM1 = values.length - 1;
     for (int j = 0; j < lenM1; j++) {
       if (values[j] < values[j + 1]) { continue; }
       throw new SketchesArgumentException(
           "Values must be unique, monotonically increasing and not NaN.");
     }
   }

   /**
    * Checks that the given fractional rank: <i>0 &le; frank &le; 1.0</i>.
    * @param frank the given fractional rank.
    */
   static final void checkFractionalRankBounds(final double frank) {
     if ((frank < 0.0) || (frank > 1.0)) {
       throw new SketchesArgumentException(
         "Fractional rank must be >= 0 and <= 1.0: " + frank);
     }
   }

   /**
    * Returns the number of retained valid items in the sketch given k and n.
    * @param k the given configured k of the sketch
    * @param n the current number of items seen by the sketch
    * @return the number of retained items in the sketch given k and n.
    */
   static int computeRetainedItems(final int k, final long n) {
     final int bbCnt = computeBaseBufferItems(k, n);
     final long bitPattern = computeBitPattern(k, n);
     final int validLevels = computeValidLevels(bitPattern);
     return bbCnt + (validLevels * k);
   }

   /**
    * Returns the total item capacity of an updatable, non-compact combined buffer
    * given <i>k</i> and <i>n</i>.  If total levels = 0, this returns the ceiling power of 2
    * size for the base buffer or the MIN_BASE_BUF_SIZE, whichever is larger.
    *
    * @param k sketch parameter. This determines the accuracy of the sketch and the
    * size of the updatable data structure, which is a function of <i>k</i> and <i>n</i>.
    *
    * @param n The number of items in the input stream
    * @return the current item capacity of the combined buffer
    */
   static int computeCombinedBufferItemCapacity(final int k, final long n) {
     final int totLevels = computeNumLevelsNeeded(k, n);
     if (totLevels == 0) {
       final int bbItems = computeBaseBufferItems(k, n);
       return Math.max(2 * DoublesSketch.MIN_K, ceilingPowerOf2(bbItems));
     }
     return (2 + totLevels) * k;
   }

   /**
    * Computes the number of valid levels above the base buffer
    * @param bitPattern the bit pattern
    * @return the number of valid levels above the base buffer
    */
   static int computeValidLevels(final long bitPattern) {
     return Long.bitCount(bitPattern);
   }

   /**
    * Computes the total number of logarithmic levels above the base buffer given the bitPattern.
    * @param bitPattern the given bit pattern
    * @return the total number of logarithmic levels above the base buffer
    */
   static int computeTotalLevels(final long bitPattern) {
     return hiBitPos(bitPattern) + 1;
   }

   /**
    * Computes the total number of logarithmic levels above the base buffer given k and n.
    * This is equivalent to max(floor(lg(n/k), 0).
    * Returns zero if n is less than 2 * k.
    * @param k the configured size of the sketch
    * @param n the total values presented to the sketch.
    * @return the total number of levels needed.
    */
   static int computeNumLevelsNeeded(final int k, final long n) {
     return 1 + hiBitPos(n / (2L * k));
   }

   /**
    * Computes the number of base buffer items given k, n
    * @param k the configured size of the sketch
    * @param n the total values presented to the sketch
    * @return the number of base buffer items
    */
   static int computeBaseBufferItems(final int k, final long n) {
     return (int) (n % (2L * k));
   }

   /**
    * Computes the levels bit pattern given k, n.
    * This is computed as <i>n / (2*k)</i>.
    * @param k the configured size of the sketch
    * @param n the total values presented to the sketch.
    * @return the levels bit pattern
    */
   static long computeBitPattern(final int k, final long n) {
     return n / (2L * k);
   }

   /**
    * Returns the log_base2 of x
    * @param x the given x
    * @return the log_base2 of x
    */
   static double lg(final double x) {
     return ( Math.log(x) / Math.log(2.0) );
   }

   /**
    * Zero-based position of the highest one-bit of the given long.
    * Returns minus one if num is zero.
    * @param num the given long
    * @return Zero-based position of the highest one-bit of the given long
    */
   static int hiBitPos(final long num) {
     return 63 - Long.numberOfLeadingZeros(num);
   }

   /**
    * Returns the zero-based bit position of the lowest zero bit of <i>bits</i> starting at
    * <i>startingBit</i>. If input is all ones, this returns 64.
    * @param bits the input bits as a long
    * @param startingBit the zero-based starting bit position. Only the low 6 bits are used.
    * @return the zero-based bit position of the lowest zero bit starting at <i>startingBit</i>.
    */
   static int lowestZeroBitStartingAt(final long bits, final int startingBit) {
     int pos = startingBit & 0X3F;
     long myBits = bits >>> pos;

     while ((myBits & 1L) != 0) {
       myBits = myBits >>> 1;
       pos++;
     }
     return pos;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.datasketches.quantiles;

	import static java.lang.Math.abs;
	import static java.lang.Math.ceil;
	import static java.lang.Math.exp;
	import static java.lang.Math.log;
	import static java.lang.Math.max;
	import static java.lang.Math.min;
	import static java.lang.Math.pow;
	import static java.lang.Math.round;
	import static org.apache.datasketches.Util.ceilingPowerOf2;
	import static org.apache.datasketches.Util.isPowerOf2;
	import static org.apache.datasketches.quantiles.DoublesSketch.MAX_K;
	import static org.apache.datasketches.quantiles.DoublesSketch.MIN_K;
	import static org.apache.datasketches.quantiles.PreambleUtil.COMPACT_FLAG_MASK;
	import static org.apache.datasketches.quantiles.PreambleUtil.EMPTY_FLAG_MASK;
	import static org.apache.datasketches.quantiles.PreambleUtil.ORDERED_FLAG_MASK;
	import static org.apache.datasketches.quantiles.PreambleUtil.READ_ONLY_FLAG_MASK;
	import static org.apache.datasketches.quantiles.PreambleUtil.extractFlags;

	import org.apache.datasketches.Family;
	import org.apache.datasketches.SketchesArgumentException;
	import org.apache.datasketches.memory.Memory;

	/**
	* Utility class for quantiles sketches.
	*
	* <p>This class contains a highly specialized sort called blockyTandemMergeSort().
	* It also contains methods that are used while building histograms and other common
	* functions.</p>
	*
	* @author Lee Rhodes
	*/
	final class Util {

	private Util() {}

	/**
	* The java line separator character as a String.
	*/
	static final String LS = System.getProperty("line.separator");

	/**
	* The tab character
	*/
	static final char TAB = '\t';

	/**
	* Computes the raw delta area between two quantile sketches for the
	* {@link #kolmogorovSmirnovTest(DoublesSketch, DoublesSketch, double)
	* Kolmogorov-Smirnov Test}
	* method.
	* @param sketch1 Input DoubleSketch 1
	* @param sketch2 Input DoubleSketch 2
	* @return the raw delta area between two quantile sketches
	*/
	public static double computeKSDelta(final DoublesSketch sketch1,
	final DoublesSketch sketch2) {
	final DoublesAuxiliary p = new DoublesAuxiliary(sketch1);
	final DoublesAuxiliary q = new DoublesAuxiliary(sketch2);

	final double[] pSamplesArr = p.auxSamplesArr_;
	final double[] qSamplesArr = q.auxSamplesArr_;
	final long[] pCumWtsArr = p.auxCumWtsArr_;
	final long[] qCumWtsArr = q.auxCumWtsArr_;
	final int pSamplesArrLen = pSamplesArr.length;
	final int qSamplesArrLen = qSamplesArr.length;

	final double n1 = sketch1.getN();
	final double n2 = sketch2.getN();

	//Compute D from the two distributions
	double deltaArea = 0.0;
	int i = getNextIndex(pSamplesArr, -1);
	int j = getNextIndex(qSamplesArr, -1);

	// We're done if either array reaches the end
	while ((i < pSamplesArrLen) && (j < qSamplesArrLen)) {
	final double pSample = pSamplesArr[i];
	final double qSample = qSamplesArr[j];
	final long pWt = pCumWtsArr[i];
	final long qWt = qCumWtsArr[j];
	final double pNormWt = pWt / n1;
	final double qNormWt = qWt / n2;
	final double pMinusQ = Math.abs(pNormWt - qNormWt);
	final double curD = deltaArea;
	deltaArea = Math.max(curD, pMinusQ);

	//Increment i or j or both
	if (pSample == qSample) {
	i = getNextIndex(pSamplesArr, i);
	j = getNextIndex(qSamplesArr, j);
	} else if (pSample < qSample) {
	i = getNextIndex(pSamplesArr, i);
	} else {
	j = getNextIndex(qSamplesArr, j);
	}
	}

	//This is D, the delta difference in area of the two distributions
	deltaArea = Math.max(deltaArea, Math.abs((pCumWtsArr[i] / n1) - (qCumWtsArr[j] / n2)));
	return deltaArea;
	}

	/**
	* Computes the adjusted delta area threshold for the
	* {@link #kolmogorovSmirnovTest(DoublesSketch, DoublesSketch, double) Kolmogorov-Smirnov Test}
	* method.
	* This adjusts the computed threshold by the error epsilons of the two given sketches.
	* See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
	* @param sketch1 Input DoubleSketch 1
	* @param sketch2 Input DoubleSketch 2
	* @param tgtPvalue Target p-value. Typically .001 to .1, e.g., .05.
	* @return the adjusted threshold to be compared with the raw delta area.
	*/
	public static double computeKSThreshold(final DoublesSketch sketch1,
	final DoublesSketch sketch2,
	final double tgtPvalue) {
	final double r1 = sketch1.getRetainedItems();
	final double r2 = sketch2.getRetainedItems();
	final double alpha = tgtPvalue;
	final double alphaFactor = Math.sqrt(-0.5 * Math.log(0.5 * alpha));
	final double deltaAreaThreshold = alphaFactor * Math.sqrt((r1 + r2) / (r1 * r2));
	final double eps1 = Util.getNormalizedRankError(sketch1.getK(), false);
	final double eps2 = Util.getNormalizedRankError(sketch2.getK(), false);

	final double adjDeltaAreaThreshold = deltaAreaThreshold + eps1 + eps2;
	return adjDeltaAreaThreshold;
	}

	/**
	* Performs the Kolmogorov-Smirnov Test between two quantiles sketches.
	* Note: if the given sketches have insufficient data or if the sketch sizes are too small,
	* this will return false.
	* @param sketch1 Input DoubleSketch 1
	* @param sketch2 Input DoubleSketch 2
	* @param tgtPvalue Target p-value. Typically .001 to .1, e.g., .05.
	* @return Boolean indicating whether we can reject the null hypothesis (that the sketches
	* reflect the same underlying distribution) using the provided tgtPValue.
	*/
	public static boolean kolmogorovSmirnovTest(final DoublesSketch sketch1,
	final DoublesSketch sketch2, final double tgtPvalue) {
	final double delta = computeKSDelta(sketch1, sketch2);
	final double thresh = computeKSThreshold(sketch1, sketch2, tgtPvalue);
	return delta > thresh;
	}

	private static final int getNextIndex(final double[] samplesArr, final int stIdx) {
	int idx = stIdx + 1;
	final int samplesArrLen = samplesArr.length;

	if (idx >= samplesArrLen) { return samplesArrLen; }

	// if we have a sequence of equal values, use the last one of the sequence
	final double val = samplesArr[idx];
	int nxtIdx = idx + 1;
	while ((nxtIdx < samplesArrLen) && (samplesArr[nxtIdx] == val)) {
	idx = nxtIdx;
	++nxtIdx;
	}
	return idx;
	}

	/**
	* Gets the normalized rank error given k and pmf for the Quantiles DoubleSketch and ItemsSketch.
	* @param k the configuation parameter
	* @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
	* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
	* @return if pmf is true, the normalized rank error for the getPMF() function.
	* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
	*/
	// constants were derived as the best fit to 99 percentile empirically measured max error in
	// thousands of trials
	public static double getNormalizedRankError(final int k, final boolean pmf) {
	return pmf
	? 1.854 / pow(k, 0.9657)
	: 1.576 / pow(k, 0.9726);
	}

	/**
	* Gets the approximate value of <em>k</em> to use given epsilon, the normalized rank error
	* for the Quantiles DoubleSketch and ItemsSketch.
	* @param epsilon the normalized rank error between zero and one.
	* @param pmf if true, this function returns the value of <em>k</em> assuming the input epsilon
	* is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function
	* returns the value of <em>k</em> assuming the input epsilon is the desired "single-sided"
	* epsilon for all the other queries.
	* @return the value of <i>k</i> given a value of epsilon.
	*/
	// constants were derived as the best fit to 99 percentile empirically measured max error in
	// thousands of trials
	public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
	//Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false.
	final double eps = max(epsilon, 6.395E-5);
	final double kdbl = pmf
	? exp(log(1.854 / eps) / 0.9657)
	: exp(log(1.576 / eps) / 0.9726);
	final double krnd = round(kdbl);
	final double del = abs(krnd - kdbl);
	//round to closest int if within 1 ppm of the int, otherwise use the ceiling.
	final int k = (int) ((del < 1E-6) ? krnd : ceil(kdbl));
	return max(MIN_K, min(MAX_K, k));
	}

	/**
	* Checks the validity of the given value k
	* @param k must be greater than 1 and less than 65536 and a power of 2.
	*/
	static void checkK(final int k) {
	if ((k < MIN_K) \|\| (k > MAX_K) \|\| !isPowerOf2(k)) {
	throw new SketchesArgumentException(
	"K must be >= " + MIN_K + " and <= " + MAX_K + " and a power of 2: " + k);
	}
	}

	/**
	* Checks the validity of the given family ID
	* @param familyID the given family ID
	*/
	static void checkFamilyID(final int familyID) {
	final Family family = Family.idToFamily(familyID);
	if (!family.equals(Family.QUANTILES)) {
	throw new SketchesArgumentException(
	"Possible corruption: Invalid Family: " + family.toString());
	}
	}

	/**
	* Checks the consistency of the flag bits and the state of preambleLong and the memory
	* capacity and returns the empty state.
	* @param preambleLongs the size of preamble in longs
	* @param flags the flags field
	* @param memCapBytes the memory capacity
	* @return the value of the empty state
	*/
	static boolean checkPreLongsFlagsCap(final int preambleLongs, final int flags, final long memCapBytes) {
	final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; //Preamble flags empty state
	final int minPre = Family.QUANTILES.getMinPreLongs(); //1
	final int maxPre = Family.QUANTILES.getMaxPreLongs(); //2
	final boolean valid = ((preambleLongs == minPre) && empty) \|\| ((preambleLongs == maxPre) && !empty);
	if (!valid) {
	throw new SketchesArgumentException(
	"Possible corruption: PreambleLongs inconsistent with empty state: " + preambleLongs);
	}
	checkHeapFlags(flags);
	if (memCapBytes < (preambleLongs << 3)) {
	throw new SketchesArgumentException(
	"Possible corruption: Insufficient capacity for preamble: " + memCapBytes);
	}
	return empty;
	}

	/**
	* Checks just the flags field of the preamble. Allowed flags are Read Only, Empty, Compact, and
	* ordered.
	* @param flags the flags field
	*/
	static void checkHeapFlags(final int flags) { //only used by checkPreLongsFlagsCap and test
	final int allowedFlags =
	READ_ONLY_FLAG_MASK \| EMPTY_FLAG_MASK \| COMPACT_FLAG_MASK \| ORDERED_FLAG_MASK;
	final int flagsMask = ~allowedFlags;
	if ((flags & flagsMask) > 0) {
	throw new SketchesArgumentException(
	"Possible corruption: Invalid flags field: " + Integer.toBinaryString(flags));
	}
	}

	/**
	* Checks just the flags field of an input Memory object. Returns true for a compact
	* sketch, false for an update sketch. Does not perform additional checks, including sketch
	* family.
	* @param srcMem the source Memory containing a sketch
	* @return true if flags indicate a compact sketch, otherwise false
	*/
	static boolean checkIsCompactMemory(final Memory srcMem) {
	// only reading so downcast is ok
	final int flags = extractFlags(srcMem);
	final int compactFlags = READ_ONLY_FLAG_MASK \| COMPACT_FLAG_MASK;
	return (flags & compactFlags) > 0;
	}

	/**
	* Checks the sequential validity of the given array of double values.
	* They must be unique, monotonically increasing and not NaN.
	* @param values the given array of double values
	*/
	static final void checkSplitPointsOrder(final double[] values) {
	if (values == null) {
	throw new SketchesArgumentException("Values cannot be null.");
	}
	final int lenM1 = values.length - 1;
	for (int j = 0; j < lenM1; j++) {
	if (values[j] < values[j + 1]) { continue; }
	throw new SketchesArgumentException(
	"Values must be unique, monotonically increasing and not NaN.");
	}
	}

	/**
	* Checks that the given fractional rank: <i>0 ≤ frank ≤ 1.0</i>.
	* @param frank the given fractional rank.
	*/
	static final void checkFractionalRankBounds(final double frank) {
	if ((frank < 0.0) \|\| (frank > 1.0)) {
	throw new SketchesArgumentException(
	"Fractional rank must be >= 0 and <= 1.0: " + frank);
	}
	}

	/**
	* Returns the number of retained valid items in the sketch given k and n.
	* @param k the given configured k of the sketch
	* @param n the current number of items seen by the sketch
	* @return the number of retained items in the sketch given k and n.
	*/
	static int computeRetainedItems(final int k, final long n) {
	final int bbCnt = computeBaseBufferItems(k, n);
	final long bitPattern = computeBitPattern(k, n);
	final int validLevels = computeValidLevels(bitPattern);
	return bbCnt + (validLevels * k);
	}

	/**
	* Returns the total item capacity of an updatable, non-compact combined buffer
	* given <i>k</i> and <i>n</i>. If total levels = 0, this returns the ceiling power of 2
	* size for the base buffer or the MIN_BASE_BUF_SIZE, whichever is larger.
	*
	* @param k sketch parameter. This determines the accuracy of the sketch and the
	* size of the updatable data structure, which is a function of <i>k</i> and <i>n</i>.
	*
	* @param n The number of items in the input stream
	* @return the current item capacity of the combined buffer
	*/
	static int computeCombinedBufferItemCapacity(final int k, final long n) {
	final int totLevels = computeNumLevelsNeeded(k, n);
	if (totLevels == 0) {
	final int bbItems = computeBaseBufferItems(k, n);
	return Math.max(2 * DoublesSketch.MIN_K, ceilingPowerOf2(bbItems));
	}
	return (2 + totLevels) * k;
	}

	/**
	* Computes the number of valid levels above the base buffer
	* @param bitPattern the bit pattern
	* @return the number of valid levels above the base buffer
	*/
	static int computeValidLevels(final long bitPattern) {
	return Long.bitCount(bitPattern);
	}

	/**
	* Computes the total number of logarithmic levels above the base buffer given the bitPattern.
	* @param bitPattern the given bit pattern
	* @return the total number of logarithmic levels above the base buffer
	*/
	static int computeTotalLevels(final long bitPattern) {
	return hiBitPos(bitPattern) + 1;
	}

	/**
	* Computes the total number of logarithmic levels above the base buffer given k and n.
	* This is equivalent to max(floor(lg(n/k), 0).
	* Returns zero if n is less than 2 * k.
	* @param k the configured size of the sketch
	* @param n the total values presented to the sketch.
	* @return the total number of levels needed.
	*/
	static int computeNumLevelsNeeded(final int k, final long n) {
	return 1 + hiBitPos(n / (2L * k));
	}

	/**
	* Computes the number of base buffer items given k, n
	* @param k the configured size of the sketch
	* @param n the total values presented to the sketch
	* @return the number of base buffer items
	*/
	static int computeBaseBufferItems(final int k, final long n) {
	return (int) (n % (2L * k));
	}

	/**
	* Computes the levels bit pattern given k, n.
	* This is computed as <i>n / (2*k)</i>.
	* @param k the configured size of the sketch
	* @param n the total values presented to the sketch.
	* @return the levels bit pattern
	*/
	static long computeBitPattern(final int k, final long n) {
	return n / (2L * k);
	}

	/**
	* Returns the log_base2 of x
	* @param x the given x
	* @return the log_base2 of x
	*/
	static double lg(final double x) {
	return ( Math.log(x) / Math.log(2.0) );
	}

	/**
	* Zero-based position of the highest one-bit of the given long.
	* Returns minus one if num is zero.
	* @param num the given long
	* @return Zero-based position of the highest one-bit of the given long
	*/
	static int hiBitPos(final long num) {
	return 63 - Long.numberOfLeadingZeros(num);
	}

	/**
	* Returns the zero-based bit position of the lowest zero bit of <i>bits</i> starting at
	* <i>startingBit</i>. If input is all ones, this returns 64.
	* @param bits the input bits as a long
	* @param startingBit the zero-based starting bit position. Only the low 6 bits are used.
	* @return the zero-based bit position of the lowest zero bit starting at <i>startingBit</i>.
	*/
	static int lowestZeroBitStartingAt(final long bits, final int startingBit) {
	int pos = startingBit & 0X3F;
	long myBits = bits >>> pos;

	while ((myBits & 1L) != 0) {
	myBits = myBits >>> 1;
	pos++;
	}
	return pos;
	}

	}