blob: eab38e2ab94ce583742d61f5da8cecb09b12e060 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.function.IntFunction;
import com.tdunning.math.stats.AVLTreeDigest;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.function.FieldNameValueSource;
public class PercentileAgg extends SimpleAggValueSource {
List<Double> percentiles;
public PercentileAgg(ValueSource vs, List<Double> percentiles) {
super("percentile", vs);
this.percentiles = percentiles;
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
ValueSource vs = getArg();
if (vs instanceof FieldNameValueSource) {
String field = ((FieldNameValueSource) vs).getFieldName();
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.getType().getNumberType() == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for " + sf.getType().getTypeName());
}
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
if (sf.getType().isPointField()) {
return new PercentileSortedNumericAcc(fcontext, sf, numSlots);
}
return new PercentileSortedSetAcc(fcontext, sf, numSlots);
}
if (sf.getType().isPointField()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
name() + " aggregation not supported for PointField w/o docValues");
}
return new PercentileUnInvertedFieldAcc(fcontext, sf, numSlots);
}
vs = sf.getType().getValueSource(sf, null);
}
return new Acc(vs, fcontext, numSlots);
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger();
}
@Override
public boolean equals(Object o) {
if (!(o instanceof PercentileAgg)) return false;
PercentileAgg other = (PercentileAgg)o;
return this.arg.equals(other.arg) && this.percentiles.equals(other.percentiles);
}
@Override
public int hashCode() {
return super.hashCode() * 31 + percentiles.hashCode();
}
public static class Parser extends ValueSourceParser {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
List<Double> percentiles = new ArrayList<>();
ValueSource vs = fp.parseValueSource();
while (fp.hasMoreArguments()) {
double val = fp.parseDouble();
if (val<0 || val>100) {
throw new SyntaxError("requested percentile must be between 0 and 100. got " + val);
}
percentiles.add(val);
}
if (percentiles.isEmpty()) {
throw new SyntaxError("expected percentile(valsource,percent1[,percent2]*) EXAMPLE:percentile(myfield,50)");
}
return new PercentileAgg(vs, percentiles);
}
}
protected Object getValueFromDigest(AVLTreeDigest digest) {
if (digest == null) {
return null;
}
if (percentiles.size() == 1) {
return digest.quantile( percentiles.get(0) * 0.01 );
}
List<Double> lst = new ArrayList<>(percentiles.size());
for (Double percentile : percentiles) {
double val = digest.quantile( percentile * 0.01 );
lst.add( val );
}
return lst;
}
class Acc extends SlotAcc.FuncSlotAcc {
protected AVLTreeDigest[] digests;
protected ByteBuffer buf;
protected double[] sortvals;
public Acc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
digests = new AVLTreeDigest[numSlots];
}
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) throws IOException {
if (!values.exists(doc)) return;
double val = values.doubleVal(doc);
AVLTreeDigest digest = digests[slotNum];
if (digest == null) {
digests[slotNum] = digest = new AVLTreeDigest(100); // TODO: make compression configurable
}
digest.add(val);
}
@Override
public int compare(int slotA, int slotB) {
if (sortvals == null) {
fillSortVals();
}
return Double.compare(sortvals[slotA], sortvals[slotB]);
}
private void fillSortVals() {
sortvals = new double[ digests.length ];
double sortp = percentiles.get(0) * 0.01;
for (int i=0; i<digests.length; i++) {
AVLTreeDigest digest = digests[i];
if (digest == null) {
sortvals[i] = Double.NEGATIVE_INFINITY;
} else {
sortvals[i] = digest.quantile(sortp);
}
}
}
@Override
public Object getValue(int slotNum) throws IOException {
if (fcontext.isShard()) {
return getShardValue(slotNum);
}
if (sortvals != null && percentiles.size()==1) {
// we've already calculated everything we need
return digests[slotNum] != null ? sortvals[slotNum] : null;
}
return getValueFromDigest( digests[slotNum] );
}
public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) return null; // no values for this slot
digest.compress();
int sz = digest.byteSize();
if (buf == null || buf.capacity() < sz) {
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
} else {
buf.clear();
}
digest.asSmallBytes(buf);
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
return arr;
}
@Override
public void reset() {
digests = new AVLTreeDigest[digests.length];
sortvals = null;
}
@Override
public void resize(Resizer resizer) {
digests = resizer.resize(digests, null);
}
}
abstract class BasePercentileDVAcc extends DocValuesAcc {
AVLTreeDigest[] digests;
protected ByteBuffer buf;
double[] sortvals;
public BasePercentileDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf);
digests = new AVLTreeDigest[numSlots];
}
@Override
public int compare(int slotA, int slotB) {
if (sortvals == null) {
fillSortVals();
}
return Double.compare(sortvals[slotA], sortvals[slotB]);
}
private void fillSortVals() {
sortvals = new double[ digests.length ];
double sortp = percentiles.get(0) * 0.01;
for (int i=0; i<digests.length; i++) {
AVLTreeDigest digest = digests[i];
if (digest == null) {
sortvals[i] = Double.NEGATIVE_INFINITY;
} else {
sortvals[i] = digest.quantile(sortp);
}
}
}
@Override
public Object getValue(int slotNum) throws IOException {
if (fcontext.isShard()) {
return getShardValue(slotNum);
}
if (sortvals != null && percentiles.size()==1) {
// we've already calculated everything we need
return digests[slotNum] != null ? sortvals[slotNum] : null;
}
return getValueFromDigest( digests[slotNum] );
}
public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) return null; // no values for this slot
digest.compress();
int sz = digest.byteSize();
if (buf == null || buf.capacity() < sz) {
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
} else {
buf.clear();
}
digest.asSmallBytes(buf);
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
return arr;
}
@Override
public void reset() {
digests = new AVLTreeDigest[digests.length];
sortvals = null;
}
@Override
public void resize(Resizer resizer) {
digests = resizer.resize(digests, null);
}
}
class PercentileSortedNumericAcc extends BasePercentileDVAcc {
SortedNumericDocValues values;
public PercentileSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) {
digests[slot] = digest = new AVLTreeDigest(100);
}
for (int i = 0, count = values.docValueCount(); i < count; i++) {
double val = getDouble(values.nextValue());
digest.add(val);
}
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
}
@Override
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
/**
* converts given long value to double based on field type
*/
protected double getDouble(long val) {
switch (sf.getType().getNumberType()) {
case INTEGER:
case LONG:
case DATE:
return val;
case FLOAT:
return NumericUtils.sortableIntToFloat((int) val);
case DOUBLE:
return NumericUtils.sortableLongToDouble(val);
default:
// this would never happen
return 0.0d;
}
}
}
class PercentileSortedSetAcc extends BasePercentileDVAcc {
SortedSetDocValues values;
public PercentileSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
}
@Override
protected void collectValues(int doc, int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) {
digests[slot] = digest = new AVLTreeDigest(100);
}
long ord;
while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef term = values.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
digest.add(val);
}
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = DocValues.getSortedSet(readerContext.reader(), sf.getName());
}
@Override
protected boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
}
class PercentileUnInvertedFieldAcc extends UnInvertedFieldAcc {
protected AVLTreeDigest[] digests;
protected ByteBuffer buf;
protected double[] sortvals;
private int currentSlot;
public PercentileUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext, sf, numSlots);
digests = new AVLTreeDigest[numSlots];
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
this.currentSlot = slot;
docToTerm.getBigTerms(doc + currentDocBase, this);
docToTerm.getSmallTerms(doc + currentDocBase, this);
}
@Override
public int compare(int slotA, int slotB) {
if (sortvals == null) {
fillSortVals();
}
return Double.compare(sortvals[slotA], sortvals[slotB]);
}
private void fillSortVals() {
sortvals = new double[ digests.length ];
double sortp = percentiles.get(0) * 0.01;
for (int i=0; i<digests.length; i++) {
AVLTreeDigest digest = digests[i];
if (digest == null) {
sortvals[i] = Double.NEGATIVE_INFINITY;
} else {
sortvals[i] = digest.quantile(sortp);
}
}
}
@Override
public Object getValue(int slotNum) throws IOException {
if (fcontext.isShard()) {
return getShardValue(slotNum);
}
if (sortvals != null && percentiles.size()==1) {
// we've already calculated everything we need
return digests[slotNum] != null ? sortvals[slotNum] : null;
}
return getValueFromDigest( digests[slotNum] );
}
public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) return null;
digest.compress();
int sz = digest.byteSize();
if (buf == null || buf.capacity() < sz) {
buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
} else {
buf.clear();
}
digest.asSmallBytes(buf);
byte[] arr = Arrays.copyOf(buf.array(), buf.position());
return arr;
}
@Override
public void reset() {
digests = new AVLTreeDigest[digests.length];
sortvals = null;
}
@Override
public void resize(Resizer resizer) {
digests = resizer.resize(digests, null);
}
@Override
public void call(int ord) {
AVLTreeDigest digest = digests[currentSlot];
if (digest == null) {
digests[currentSlot] = digest = new AVLTreeDigest(100);
}
try {
BytesRef term = docToTerm.lookupOrd(ord);
Object obj = sf.getType().toObject(sf, term);
double val = obj instanceof Date ? ((Date) obj).getTime() : ((Number) obj).doubleValue();
digest.add(val);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
class Merger extends FacetModule.FacetSortableMerger {
protected AVLTreeDigest digest;
protected Double sortVal;
@Override
public void merge(Object facetResult, Context mcontext) {
byte[] arr = (byte[])facetResult;
if (arr == null) return; // an explicit null can mean no values in the field
AVLTreeDigest subDigest = AVLTreeDigest.fromBytes(ByteBuffer.wrap(arr));
if (digest == null) {
digest = subDigest;
} else {
digest.add(subDigest);
}
}
@Override
public Object getMergedResult() {
if (percentiles.size() == 1 && digest != null) return getSortVal();
return getValueFromDigest(digest);
}
@Override
public int compareTo(FacetModule.FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Double.compare(getSortVal(), ((Merger) other).getSortVal());
}
private Double getSortVal() {
if (sortVal == null) {
sortVal = digest==null ? Double.NEGATIVE_INFINITY : digest.quantile( percentiles.get(0) * 0.01 );
}
return sortVal;
}
}
}