blob: ee53e910f878edf8cdaab824ef6ca87a7b80655a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.quantilescommon;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import org.apache.datasketches.common.SketchesStateException;
/**
* This defines the returned results of the getParitionBoundaries() function and
* includes the basic methods needed to construct actual partitions.
*/
public final class GenericPartitionBoundaries<T> {
private long totalN; //totalN of source sketch
private T[] boundaries; //quantiles at the boundaries
private long[] natRanks; //natural ranks at the boundaries
private double[] normRanks; //normalized ranks at the boundaries
private T maxItem; //of the source sketch
private T minItem; //of the source sketch
private QuantileSearchCriteria searchCrit; //of the source sketch query to getPartitionBoundaries.
//computed
private long[] numDeltaItems; //num of items in each partition
private int numPartitions; //num of partitions
public GenericPartitionBoundaries(
final long totalN,
final T[] boundaries,
final long[] natRanks,
final double[] normRanks,
final T maxItem,
final T minItem,
final QuantileSearchCriteria searchCrit) {
this.totalN = totalN;
this.boundaries = boundaries; //SpotBugs EI_EXPOSE_REP2 OK: copying from sketch class to this "friend" class.
this.natRanks = natRanks; // "
this.normRanks = normRanks; // "
this.maxItem = maxItem;
this.minItem = minItem;
this.searchCrit = searchCrit;
//check and compute
final int len = boundaries.length;
if (len < 2) { throw new SketchesStateException("Source sketch is empty"); } //class is final, this is ok
numDeltaItems = new long[len];
numDeltaItems[0] = 0; // index 0 is always 0
for (int i = 1; i < len; i++) {
final int addOne = ( (i == 1 && (this.searchCrit == INCLUSIVE))
|| ((i == (len - 1)) && this.searchCrit == EXCLUSIVE) ) ? 1 : 0;
numDeltaItems[i] = natRanks[i] - natRanks[i - 1] + addOne;
}
this.numPartitions = len - 1;
}
/**
* Gets the length of the input stream offered to the underlying sketch.
* @return the length of the input stream offered to the underlying sketch.
*/
public long getN() { return totalN; }
/**
* Gets an ordered array of boundaries that sequentially define the upper and lower boundaries of partitions.
* These partitions are to be constructed by an external process. Each boundary is essentially a reference and
* should uniquely identify an item or a set of identical items from the original stream of data fed to the
* originating sketch.
*
* <p>Assume boundaries array has size N + 1. Let the indicies be sequentially numbered from 0 to N.
* The number of partitions is always one less than the size of the boundaries array.
* Let the the partitions be sequentially numbered from 1 to N.
*
* <p>If these results were computed using QuantileSearchCriteria.INCLUSIVE then these sequential boundaries
* are to be interpreted as follows:
* <ul>
* <li>Partition 1: include all items &ge; index 0 and &le; index 1.</li>
* <li>Partition 2: include all items &gt; index 1 and &le; index 2.</li>
* <li>Partition N: include all items &gt; index N-1 and &le; index N.</li>
* </ul>
*
* <p>If these results were computed using QuantileSearchCriteria.EXCLUSIVE then these sequential boundaries
* are to be interpreted as follows:
* <ul>
* <li>Partition 1: include all items &ge; index 0 and &lt; index 1.</li>
* <li>Partition 2: include all items &ge; index 1 and &lt; index 2.</li>
* <li>Partition N: include all items &ge; index N-1 and &le; index N.</li>
* </ul>
*
* @return an array of boundaries that sequentially define the upper and lower boundaries of partitions.
*/
public T[] getBoundaries() { return boundaries.clone(); }
/**
* Gets an ordered array of natural ranks of the associated array of partition boundaries utilizing
* a specified search criterion. Natural ranks are integral values on the interval [1, N]
* @return an array of natural ranks.
*/
public long[] getNaturalRanks() { return natRanks.clone(); }
/**
* Gets an ordered array of normalized ranks of the associated array of partition boundaries utilizing
* a specified search criterion. Normalized ranks are double values on the interval [0.0, 1.0].
* @return an array of normalized ranks.
*/
public double[] getNormalizedRanks() { return normRanks.clone(); }
/**
* Gets the number of items to be included for each partition as an array.
* The count at index 0 is 0. The number of items included in the first partition, defined by the boundaries at
* index 0 and index 1, is at index 1 in this array, etc.
* @return the number of items to be included for each partition as an array.
*/
public long[] getNumDeltaItems() { return numDeltaItems.clone(); }
/**
* Gets the number of partitions
* @return the number of partitions
*/
public int getNumPartitions() { return numPartitions; }
/**
* Returns the maximum item of the stream. This may be distinct from the largest item retained by the
* sketch algorithm.
*
* @return the maximum item of the stream
* @throws IllegalArgumentException if sketch is empty.
*/
public T getMaxItem() { return maxItem; }
/**
* Returns the minimum item of the stream. This may be distinct from the smallest item retained by the
* sketch algorithm.
*
* @return the minimum item of the stream
* @throws IllegalArgumentException if sketch is empty.
*/
public T getMinItem() { return minItem; }
/**
* Gets the search criteria specified for the source sketch
* @return The search criteria specified for the source sketch
*/
public QuantileSearchCriteria getSearchCriteria() { return searchCrit; }
}