blob: 75798c20feb22eedb9c3794310870a6ac44e3f3e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.quantilescommon;
import static java.lang.Math.log;
import static java.lang.Math.pow;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import java.util.Objects;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.Util;
/**
* Utilities for the quantiles sketches.
*
* @author Lee Rhodes
*/
public final class QuantilesUtil {
private QuantilesUtil() {}
/**
* Checks that the given normalized rank: <i>0 &le; nRank &le; 1.0</i>.
* @param nRank the given normalized rank.
*/
public static final void checkNormalizedRankBounds(final double nRank) {
if ((nRank < 0.0) || (nRank > 1.0)) {
throw new SketchesArgumentException(
"A normalized rank must be >= 0 and <= 1.0: " + nRank);
}
}
/**
* Checks the sequential validity of the given array of double values.
* They must be unique, monotonically increasing and not NaN.
* @param values the given array of double values
*/
public static final void checkDoublesSplitPointsOrder(final double[] values) {
Objects.requireNonNull(values);
final int len = values.length;
if (len == 1 && Double.isNaN(values[0])) {
throw new SketchesArgumentException(
"Values must be unique, monotonically increasing and not NaN.");
}
for (int j = 0; j < len - 1; j++) {
if (values[j] < values[j + 1]) { continue; }
throw new SketchesArgumentException(
"Values must be unique, monotonically increasing and not NaN.");
}
}
/**
* Checks the sequential validity of the given array of float values.
* They must be unique, monotonically increasing and not NaN.
* @param values the given array of double values
*/
public static final void checkFloatsSplitPointsOrder(final float[] values) {
Objects.requireNonNull(values);
final int len = values.length;
if (len == 1 && Float.isNaN(values[0])) {
throw new SketchesArgumentException(
"Values must be unique, monotonically increasing and not NaN.");
}
for (int j = 0; j < len - 1; j++) {
if (values[j] < values[j + 1]) { continue; }
throw new SketchesArgumentException(
"Values must be unique, monotonically increasing and not NaN.");
}
}
/**
* Returns an array of (<i>num</i> + 1) values that define equally sized intervals between 0.0, inclusive, and 1.0,
* inclusive. The end points 0.0 and 1.0 are part of the returned array.
*
* <p>For example, if num == 2, three values will be returned: 0.0, .5, and 1, where the two equally sized regions
* are {0.0,0.5}, and {0.5, 1.0}.</p>
* @param num the total number of equally sized intervals between 0.0, inclusive and 1.0, inclusive.
* Must be 1 or greater.
* @return a double array of values that define (num + 1) equally sized intervals between 0.0, inclusive and 1.0,
* inclusive.
* @throws IllegalArgumentException if <i>num</i> is less than 1.
*/
public static double[] equallySpacedDoubles(final int num) {
if (num < 1) { throw new IllegalArgumentException("num must be >= 1"); }
final double[] out = new double[num + 1];
out[0] = 0.0;
out[num] = 1.0;
final double delta = 1.0 / num;
for (int i = 1; i < num; i++) { out[i] = i * delta; }
return out;
}
/**
* Returns an array of (<i>num</i> + 1) longs that define, approximately, equally spaced intervals between the given
* <i>max</i>, inclusive, and <i>min</i>, inclusive. The end points <i>max</i> and <i>min</i> are part of the
* returned array. Because the range of the values may not exactly divide into <i>num</i> intervals,
* the size of these intervals may vary by plus or minus one.
* @param min the lowest positive valued (or zero) number of the range
* @param max the highest positive valued number of the range. <i>max</i> must be greater than <i>min</i>
* @param num Number of requested intervals. Must be greater or equal to one, and less than or equal to
* <i>max - min</i>.
*
* @return an array of (<i>num</i> + 1) longs that are approximately equally spaced between the given min and max.
*/
public static long[] equallySpacedLongs(final long min, final long max, final int num) {
if (num < 1 || min < 0 || max < 1 || (min >= max) || num > (max - min)) {
throw new SketchesArgumentException(
"Improper inputs: n < 1, min < 0, max < 1, min >= max, or n > (max - min)");
}
final long span = (max - min);
final double[] splits = equallySpacedDoubles(num);
final int len = num + 1;
final long[] out = new long[len];
long prev = -1L;
for (int i = 0; i < len; i++) {
long cur = Math.round(splits[i] * span);
if (cur == prev) { cur++; } else { prev = cur; }
out[i] = min + cur;
}
return out;
}
/**
* Returns a float array of evenly spaced values between value1, inclusive, and value2 inclusive.
* If value2 &gt; value1, the resulting sequence will be increasing.
* If value2 &lt; value1, the resulting sequence will be decreasing.
* @param value1 will be in index 0 of the returned array
* @param value2 will be in the highest index of the returned array
* @param num the total number of values including value1 and value2. Must be 2 or greater.
* @return a float array of evenly spaced values between value1, inclusive, and value2 inclusive.
*/
public static float[] evenlySpacedFloats(final float value1, final float value2, final int num) {
if (num < 2) {
throw new SketchesArgumentException("num must be >= 2");
}
final float[] out = new float[num];
out[0] = value1;
out[num - 1] = value2;
if (num == 2) { return out; }
final float delta = (value2 - value1) / (num - 1);
for (int i = 1; i < num - 1; i++) { out[i] = i * delta + value1; }
return out;
}
/**
* Returns a double array of evenly spaced values between value1, inclusive, and value2 inclusive.
* If value2 &gt; value1, the resulting sequence will be increasing.
* If value2 &lt; value1, the resulting sequence will be decreasing.
* @param value1 will be in index 0 of the returned array
* @param value2 will be in the highest index of the returned array
* @param num the total number of values including value1 and value2. Must be 2 or greater.
* @return a float array of evenly spaced values between value1, inclusive, and value2 inclusive.
*/
public static double[] evenlySpacedDoubles(final double value1, final double value2, final int num) {
if (num < 2) {
throw new SketchesArgumentException("num must be >= 2");
}
final double[] out = new double[num];
out[0] = value1;
out[num - 1] = value2;
if (num == 2) { return out; }
final double delta = (value2 - value1) / (num - 1);
for (int i = 1; i < num - 1; i++) { out[i] = i * delta + value1; }
return out;
}
/**
* Returns a double array of values between min and max inclusive where the log of the
* returned values are evenly spaced.
* If value2 &gt; value1, the resulting sequence will be increasing.
* If value2 &lt; value1, the resulting sequence will be decreasing.
* @param value1 will be in index 0 of the returned array, and must be greater than zero.
* @param value2 will be in the highest index of the returned array, and must be greater than zero.
* @param num the total number of values including value1 and value2. Must be 2 or greater
* @return a double array of exponentially spaced values between value1 and value2 inclusive.
*/
public static double[] evenlyLogSpaced(final double value1, final double value2, final int num) {
if (num < 2) {
throw new SketchesArgumentException("num must be >= 2");
}
if (value1 <= 0 || value2 <= 0) {
throw new SketchesArgumentException("value1 and value2 must be > 0.");
}
final double[] arr = evenlySpacedDoubles(log(value1) / Util.LOG2, log(value2) / Util.LOG2, num);
for (int i = 0; i < arr.length; i++) { arr[i] = pow(2.0,arr[i]); }
return arr;
}
/** used in search to improve rounding over a wide dynamic range */
public static final double tailRoundingFactor = 1e7;
/**
* Computes the closest Natural Rank from a given Normalized Rank
* @param normalizedRank the given normalized rank
* @param totalN the total N
* @param searchCrit the search criterion.
* @return the closest Natural Rank from a given Normalized Rank
*/
public static double getNaturalRank(
final double normalizedRank,
final long totalN,
final QuantileSearchCriteria searchCrit) {
double naturalRank = (normalizedRank * totalN);
if (totalN <= tailRoundingFactor) {
naturalRank = Math.round(naturalRank * tailRoundingFactor) / tailRoundingFactor;
}
return (searchCrit == INCLUSIVE) ? (long)Math.ceil(naturalRank) : (long)Math.floor(naturalRank);
}
}