blob: f3319ee97fdda9d6d6fe6cbc6002dac6d03c9a6a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
/** Holds statistics for a DocValues field. */
public abstract class DocValuesStats<T> {
private int missing = 0;
private int count = 0;
protected final String field;
protected T min;
protected T max;
protected DocValuesStats(String field, T initialMin, T initialMax) {
this.field = field;
this.min = initialMin;
this.max = initialMax;
}
/**
* Called after {@link #accumulate(int)} was processed and verified that the document has a value for
* the field. Implementations should update the statistics based on the value of the current document.
*
* @param count
* the updated number of documents with value for this field.
*/
protected abstract void doAccumulate(int count) throws IOException;
/**
* Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e.
* it does have the requested DocValues field).
*/
protected abstract boolean init(LeafReaderContext context) throws IOException;
/** Returns whether the given document has a value for the requested DocValues field. */
protected abstract boolean hasValue(int doc) throws IOException;
final void accumulate(int doc) throws IOException {
if (hasValue(doc)) {
++count;
doAccumulate(count);
} else {
++missing;
}
}
final void addMissing() {
++missing;
}
/** The field for which these stats were computed. */
public final String field() {
return field;
}
/** The number of documents which have a value of the field. */
public final int count() {
return count;
}
/** The number of documents which do not have a value of the field. */
public final int missing() {
return missing;
}
/** The minimum value of the field. Undefined when {@link #count()} is zero. */
public final T min() {
return min;
}
/** The maximum value of the field. Undefined when {@link #count()} is zero. */
public final T max() {
return max;
}
/** Holds statistics for a numeric DocValues field. */
public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
protected double mean = 0.0;
protected double variance = 0.0;
protected NumericDocValues ndv;
protected NumericDocValuesStats(String field, T initialMin, T initialMax) {
super(field, initialMin, initialMax);
}
@Override
protected final boolean init(LeafReaderContext context) throws IOException {
ndv = context.reader().getNumericDocValues(field);
return ndv != null;
}
@Override
protected final boolean hasValue(int doc) throws IOException {
return ndv.advanceExact(doc);
}
/** The mean of all values of the field. */
public final double mean() {
return mean;
}
/** Returns the variance of all values of the field. */
public final double variance() {
int count = count();
return count > 0 ? variance / count : 0;
}
/** Returns the stdev of all values of the field. */
public final double stdev() {
return Math.sqrt(variance());
}
/** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */
public abstract T sum();
}
/** Holds DocValues statistics for a numeric field storing {@code long} values. */
public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
// To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable.
private long sum = 0;
public LongDocValuesStats(String field) {
super(field, Long.MAX_VALUE, Long.MIN_VALUE);
}
@Override
protected void doAccumulate(int count) throws IOException {
long val = ndv.longValue();
if (val > max) {
max = val;
}
if (val < min) {
min = val;
}
sum += val;
double oldMean = mean;
mean += (val - mean) / count;
variance += (val - mean) * (val - oldMean);
}
@Override
public Long sum() {
return sum;
}
}
/** Holds DocValues statistics for a numeric field storing {@code double} values. */
public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
// To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable.
private double sum = 0;
public DoubleDocValuesStats(String field) {
super(field, Double.MAX_VALUE, Double.MIN_VALUE);
}
@Override
protected void doAccumulate(int count) throws IOException {
double val = Double.longBitsToDouble(ndv.longValue());
if (Double.compare(val, max) > 0) {
max = val;
}
if (Double.compare(val, min) < 0) {
min = val;
}
sum += val;
double oldMean = mean;
mean += (val - mean) / count;
variance += (val - mean) * (val - oldMean);
}
@Override
public Double sum() {
return sum;
}
}
/** Holds statistics for a sorted-numeric DocValues field. */
public static abstract class SortedNumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
protected long valuesCount = 0;
protected double mean = 0.0;
protected double variance = 0.0;
protected SortedNumericDocValues sndv;
protected SortedNumericDocValuesStats(String field, T initialMin, T initialMax) {
super(field, initialMin, initialMax);
}
@Override
protected final boolean init(LeafReaderContext context) throws IOException {
sndv = context.reader().getSortedNumericDocValues(field);
return sndv != null;
}
@Override
protected final boolean hasValue(int doc) throws IOException {
return sndv.advanceExact(doc);
}
/** The mean of all values of the field. */
public final double mean() {
return mean;
}
/** Returns the variance of all values of the field. */
public final double variance() {
int count = count();
return count > 0 ? variance / count : 0;
}
/** Returns the stdev of all values of the field. */
public final double stdev() {
return Math.sqrt(variance());
}
/** Returns the total number of values for this field. */
public final long valuesCount() {
return valuesCount;
}
/** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */
public abstract T sum();
}
/** Holds DocValues statistics for a sorted-numeric field storing {@code long} values. */
public static final class SortedLongDocValuesStats extends SortedNumericDocValuesStats<Long> {
// To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable.
private long sum = 0;
public SortedLongDocValuesStats(String field) {
super(field, Long.MAX_VALUE, Long.MIN_VALUE);
}
@Override
protected void doAccumulate(int count) throws IOException {
int numValues = sndv.docValueCount();
while (numValues-- > 0) {
long val = sndv.nextValue();
if (val > max) {
max = val;
}
if (val < min) {
min = val;
}
sum += val;
double oldMean = mean;
// for correct "running average computation", increase valuesCount with each value, rather than once before the
// loop stats.
++valuesCount;
mean += (val - mean) / valuesCount;
variance += (val - mean) * (val - oldMean);
}
}
@Override
public Long sum() {
return sum;
}
}
/** Holds DocValues statistics for a sorted-numeric field storing {@code double} values. */
public static final class SortedDoubleDocValuesStats extends SortedNumericDocValuesStats<Double> {
// To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable.
private double sum = 0;
public SortedDoubleDocValuesStats(String field) {
super(field, Double.MAX_VALUE, Double.MIN_VALUE);
}
@Override
protected void doAccumulate(int count) throws IOException {
int numValues = sndv.docValueCount();
while (numValues-- > 0) {
double val = Double.longBitsToDouble(sndv.nextValue());
if (Double.compare(val, max) > 0) {
max = val;
}
if (Double.compare(val, min) < 0) {
min = val;
}
sum += val;
double oldMean = mean;
// for correct "running average computation", increase valuesCount with each value, rather than once before the
// loop stats.
++valuesCount;
mean += (val - mean) / valuesCount;
variance += (val - mean) * (val - oldMean);
}
}
@Override
public Double sum() {
return sum;
}
}
private static BytesRef copyFrom(BytesRef src, BytesRef dest) {
if (dest == null) {
return BytesRef.deepCopyOf(src);
}
dest.bytes = ArrayUtil.grow(dest.bytes, src.length);
System.arraycopy(src.bytes, src.offset, dest.bytes, 0, src.length);
dest.offset = 0;
dest.length = src.length;
return dest;
}
/** Holds statistics for a sorted DocValues field. */
public static class SortedDocValuesStats extends DocValuesStats<BytesRef> {
protected SortedDocValues sdv;
protected SortedDocValuesStats(String field) {
super(field, null, null);
}
@Override
protected final boolean init(LeafReaderContext context) throws IOException {
sdv = context.reader().getSortedDocValues(field);
return sdv != null;
}
@Override
protected final boolean hasValue(int doc) throws IOException {
return sdv.advanceExact(doc);
}
@Override
protected void doAccumulate(int count) throws IOException {
BytesRef val = sdv.binaryValue();
if (max == null || val.compareTo(max) > 0) {
max = copyFrom(val, max);
}
if (min == null || val.compareTo(min) < 0) {
min = copyFrom(val, min);
}
}
}
/** Holds statistics for a sorted-set DocValues field. */
public static class SortedSetDocValuesStats extends DocValuesStats<BytesRef> {
protected SortedSetDocValues ssdv;
protected SortedSetDocValuesStats(String field) {
super(field, null, null);
}
@Override
protected final boolean init(LeafReaderContext context) throws IOException {
ssdv = context.reader().getSortedSetDocValues(field);
return ssdv != null;
}
@Override
protected final boolean hasValue(int doc) throws IOException {
return ssdv.advanceExact(doc);
}
@Override
protected void doAccumulate(int count) throws IOException {
long ord;
while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef val = ssdv.lookupOrd(ord);
if (max == null || val.compareTo(max) > 0) {
max = copyFrom(val, max);
}
if (min == null || val.compareTo(min) < 0) {
min = copyFrom(val, min);
}
}
}
}
}