blob: 5157838253388458aa1c899c0fa54f2fadcc172d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.quantilescommon;
import static java.lang.Math.min;
import static org.apache.datasketches.quantilescommon.GenericInequalitySearch.find;
import static org.apache.datasketches.quantilescommon.IncludeMinMax.ItemsPair;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles;
import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank;
import java.lang.reflect.Array;
import java.util.Comparator;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality;
/**
* The SortedView for the KllItemsSketch and the classic ItemsSketch.
* @param <T> The sketch data type
* @author Alexander Saydakov
* @author Lee Rhodes
*/
public class ItemsSketchSortedView<T> implements GenericSortedView<T> {
private final T[] quantiles;
private final long[] cumWeights; //cumulative natural weights
private final long totalN;
private final Comparator<? super T> comparator;
private final Class<T> clazz;
private final double normRankError;
private final int numRetItems;
/**
* Constructor.
* @param quantiles the given array of quantiles, which must be ordered.
* @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and
* the last value must be equal to N, the total number of items updated to the sketch.
* @param sk the underlying quantile sketch.
*/
public ItemsSketchSortedView(
final T[] quantiles,
final long[] cumWeights, //or Natural Ranks
final QuantilesGenericAPI<T> sk) {
this.comparator = sk.getComparator();
final ItemsPair<T> iPair =
IncludeMinMax.includeItemsMinMax(quantiles, cumWeights, sk.getMaxItem(), sk.getMinItem(), comparator);
this.quantiles = iPair.quantiles;
this.cumWeights = iPair.cumWeights;
this.totalN = sk.getN();
this.clazz = sk.getClassOfT();
this.normRankError = sk.getNormalizedRankError(true);
this.numRetItems = sk.getNumRetained();
}
//Used for testing
ItemsSketchSortedView(
final T[] quantiles,
final long[] cumWeights,
final long totalN,
final Comparator<? super T> comparator,
final T maxItem,
final T minItem,
final Class<T> clazz,
final double normRankError,
final int numRetItems) {
this.comparator = comparator;
final ItemsPair<T> iPair =
IncludeMinMax.includeItemsMinMax(quantiles, cumWeights, maxItem, minItem, comparator);
this.quantiles = iPair.quantiles;
this.cumWeights = iPair.cumWeights;
this.totalN = totalN;
this.clazz = clazz;
this.normRankError = normRankError;
this.numRetItems = numRetItems;
}
//end of constructors
@Override
public Comparator<? super T> getComparator() { return comparator; }
@Override
public long[] getCumulativeWeights() {
return cumWeights.clone();
}
@Override
public T getMaxItem() {
final int top = quantiles.length - 1;
return quantiles[top];
}
@Override
public T getMinItem() {
return quantiles[0];
}
@Override
public long getN() {
return totalN;
}
@Override
public int getNumRetained() {
return quantiles.length;
}
@Override
public int getMaxPartitions() {
return (int) min(1.0 / normRankError, numRetItems / 2.0);
}
@Override
public GenericPartitionBoundaries<T> getPartitionBoundariesFromPartSize(
final long nominalPartitionSize,
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(QuantilesAPI.EMPTY_MSG); }
final long minPartSizeItems = getMinPartitionSizeItems();
if (nominalPartitionSize < minPartSizeItems) {
throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
+ " The requested nominal partition size is too small for this sketch.");
}
final long totalN = this.totalN;
final int numEquallySizedParts = (int) min(totalN / minPartSizeItems, getMaxPartitions());
return getPartitionBoundariesFromNumParts(numEquallySizedParts);
}
@Override
@SuppressWarnings("unchecked")
public GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(
final int numEquallySizedParts,
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(QuantilesAPI.EMPTY_MSG); }
final int maxParts = getMaxPartitions();
if (numEquallySizedParts > maxParts) {
throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
+ " The requested number of partitions is too large for this sketch.");
}
final double[] searchNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySizedParts + 1);
final int partArrLen = searchNormRanks.length;
final T[] partQuantiles = (T[]) Array.newInstance(clazz, partArrLen);
final long[] partNatRanks = new long[partArrLen];
final double[] partNormRanks = new double[partArrLen];
//compute the quantiles and natural and normalized ranks for the partition boundaries.
for (int i = 0; i < partArrLen; i++) {
final int index = getQuantileIndex(searchNormRanks[i], cumWeights, searchCrit);
partQuantiles[i] = quantiles[index];
final long cumWt = cumWeights[index];
partNatRanks[i] = cumWt;
partNormRanks[i] = (double)cumWt / totalN;
}
//Return the GPB of the complete specification of the boundaries.
final GenericPartitionBoundaries<T> gpb = new GenericPartitionBoundaries<>(
this.totalN,
partQuantiles,
partNatRanks,
partNormRanks,
getMaxItem(),
getMinItem(),
searchCrit);
return gpb;
} //End of getPartitionBoundaries
@Override
public T getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
QuantilesUtil.checkNormalizedRankBounds(rank);
final int index = getQuantileIndex(rank, cumWeights, searchCrit);
return quantiles[index];
}
private int getQuantileIndex(final double normRank, final long[] localCumWeights,
final QuantileSearchCriteria searchCrit) {
final int len = localCumWeights.length;
final double naturalRank = getNaturalRank(normRank, totalN, searchCrit);
final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
final int index = InequalitySearch.find(localCumWeights, 0, len - 1, naturalRank, crit);
if (index == -1) { return len - 1; }
return index;
}
/**
* Gets an array of quantiles corresponding to the given array of ranks.
* @param ranks the given array of normalized ranks
* @param searchCrit The search criterion: either INCLUSIVE or EXCLUSIVE.
* @return an array of quantiles corresponding to the given array of ranks.
*/
@SuppressWarnings("unchecked")
public T[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
final int len = ranks.length;
final T[] quants = (T[]) Array.newInstance(clazz, len);
for (int i = 0; i < len; i++) {
quants[i] = getQuantile(ranks[i], searchCrit);
}
return quants;
}
@Override
public T[] getQuantiles() {
return quantiles.clone();
}
@Override
public double getRank(final T quantile, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
final int len = quantiles.length;
final Inequality crit = (searchCrit == INCLUSIVE) ? Inequality.LE : Inequality.LT;
final int index = find(quantiles, 0, len - 1, quantile, crit, comparator);
if (index == -1) {
return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile
}
return (double)cumWeights[index] / totalN;
}
@Override
public boolean isEmpty() {
return totalN == 0;
}
@Override
public GenericSortedViewIterator<T> iterator() {
return new GenericSortedViewIterator<>(quantiles, cumWeights);
}
}