| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search; |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedNumericDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** Holds statistics for a DocValues field. */ |
| public abstract class DocValuesStats<T> { |
| |
| private int missing = 0; |
| private int count = 0; |
| |
| protected final String field; |
| |
| protected T min; |
| protected T max; |
| |
| protected DocValuesStats(String field, T initialMin, T initialMax) { |
| this.field = field; |
| this.min = initialMin; |
| this.max = initialMax; |
| } |
| |
| /** |
| * Called after {@link #accumulate(int)} was processed and verified that the document has a value for |
| * the field. Implementations should update the statistics based on the value of the current document. |
| * |
| * @param count |
| * the updated number of documents with value for this field. |
| */ |
| protected abstract void doAccumulate(int count) throws IOException; |
| |
| /** |
| * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e. |
| * it does have the requested DocValues field). |
| */ |
| protected abstract boolean init(LeafReaderContext context) throws IOException; |
| |
| /** Returns whether the given document has a value for the requested DocValues field. */ |
| protected abstract boolean hasValue(int doc) throws IOException; |
| |
| final void accumulate(int doc) throws IOException { |
| if (hasValue(doc)) { |
| ++count; |
| doAccumulate(count); |
| } else { |
| ++missing; |
| } |
| } |
| |
| final void addMissing() { |
| ++missing; |
| } |
| |
| /** The field for which these stats were computed. */ |
| public final String field() { |
| return field; |
| } |
| |
| /** The number of documents which have a value of the field. */ |
| public final int count() { |
| return count; |
| } |
| |
| /** The number of documents which do not have a value of the field. */ |
| public final int missing() { |
| return missing; |
| } |
| |
| /** The minimum value of the field. Undefined when {@link #count()} is zero. */ |
| public final T min() { |
| return min; |
| } |
| |
| /** The maximum value of the field. Undefined when {@link #count()} is zero. */ |
| public final T max() { |
| return max; |
| } |
| |
| /** Holds statistics for a numeric DocValues field. */ |
| public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> { |
| |
| protected double mean = 0.0; |
| protected double variance = 0.0; |
| |
| protected NumericDocValues ndv; |
| |
| protected NumericDocValuesStats(String field, T initialMin, T initialMax) { |
| super(field, initialMin, initialMax); |
| } |
| |
| @Override |
| protected final boolean init(LeafReaderContext context) throws IOException { |
| ndv = context.reader().getNumericDocValues(field); |
| return ndv != null; |
| } |
| |
| @Override |
| protected final boolean hasValue(int doc) throws IOException { |
| return ndv.advanceExact(doc); |
| } |
| |
| /** The mean of all values of the field. */ |
| public final double mean() { |
| return mean; |
| } |
| |
| /** Returns the variance of all values of the field. */ |
| public final double variance() { |
| int count = count(); |
| return count > 0 ? variance / count : 0; |
| } |
| |
| /** Returns the stdev of all values of the field. */ |
| public final double stdev() { |
| return Math.sqrt(variance()); |
| } |
| |
| /** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */ |
| public abstract T sum(); |
| } |
| |
| /** Holds DocValues statistics for a numeric field storing {@code long} values. */ |
| public static final class LongDocValuesStats extends NumericDocValuesStats<Long> { |
| |
| // To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable. |
| private long sum = 0; |
| |
| public LongDocValuesStats(String field) { |
| super(field, Long.MAX_VALUE, Long.MIN_VALUE); |
| } |
| |
| @Override |
| protected void doAccumulate(int count) throws IOException { |
| long val = ndv.longValue(); |
| if (val > max) { |
| max = val; |
| } |
| if (val < min) { |
| min = val; |
| } |
| sum += val; |
| double oldMean = mean; |
| mean += (val - mean) / count; |
| variance += (val - mean) * (val - oldMean); |
| } |
| |
| @Override |
| public Long sum() { |
| return sum; |
| } |
| } |
| |
| /** Holds DocValues statistics for a numeric field storing {@code double} values. */ |
| public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> { |
| |
| // To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable. |
| private double sum = 0; |
| |
| public DoubleDocValuesStats(String field) { |
| super(field, Double.MAX_VALUE, Double.MIN_VALUE); |
| } |
| |
| @Override |
| protected void doAccumulate(int count) throws IOException { |
| double val = Double.longBitsToDouble(ndv.longValue()); |
| if (Double.compare(val, max) > 0) { |
| max = val; |
| } |
| if (Double.compare(val, min) < 0) { |
| min = val; |
| } |
| sum += val; |
| double oldMean = mean; |
| mean += (val - mean) / count; |
| variance += (val - mean) * (val - oldMean); |
| } |
| |
| @Override |
| public Double sum() { |
| return sum; |
| } |
| } |
| |
| /** Holds statistics for a sorted-numeric DocValues field. */ |
| public static abstract class SortedNumericDocValuesStats<T extends Number> extends DocValuesStats<T> { |
| |
| protected long valuesCount = 0; |
| protected double mean = 0.0; |
| protected double variance = 0.0; |
| |
| protected SortedNumericDocValues sndv; |
| |
| protected SortedNumericDocValuesStats(String field, T initialMin, T initialMax) { |
| super(field, initialMin, initialMax); |
| } |
| |
| @Override |
| protected final boolean init(LeafReaderContext context) throws IOException { |
| sndv = context.reader().getSortedNumericDocValues(field); |
| return sndv != null; |
| } |
| |
| @Override |
| protected final boolean hasValue(int doc) throws IOException { |
| return sndv.advanceExact(doc); |
| } |
| |
| /** The mean of all values of the field. */ |
| public final double mean() { |
| return mean; |
| } |
| |
| /** Returns the variance of all values of the field. */ |
| public final double variance() { |
| int count = count(); |
| return count > 0 ? variance / count : 0; |
| } |
| |
| /** Returns the stdev of all values of the field. */ |
| public final double stdev() { |
| return Math.sqrt(variance()); |
| } |
| |
| /** Returns the total number of values for this field. */ |
| public final long valuesCount() { |
| return valuesCount; |
| } |
| |
| /** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */ |
| public abstract T sum(); |
| } |
| |
| /** Holds DocValues statistics for a sorted-numeric field storing {@code long} values. */ |
| public static final class SortedLongDocValuesStats extends SortedNumericDocValuesStats<Long> { |
| |
| // To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable. |
| private long sum = 0; |
| |
| public SortedLongDocValuesStats(String field) { |
| super(field, Long.MAX_VALUE, Long.MIN_VALUE); |
| } |
| |
| @Override |
| protected void doAccumulate(int count) throws IOException { |
| int numValues = sndv.docValueCount(); |
| while (numValues-- > 0) { |
| long val = sndv.nextValue(); |
| if (val > max) { |
| max = val; |
| } |
| if (val < min) { |
| min = val; |
| } |
| sum += val; |
| double oldMean = mean; |
| // for correct "running average computation", increase valuesCount with each value, rather than once before the |
| // loop stats. |
| ++valuesCount; |
| mean += (val - mean) / valuesCount; |
| variance += (val - mean) * (val - oldMean); |
| } |
| } |
| |
| @Override |
| public Long sum() { |
| return sum; |
| } |
| } |
| |
| /** Holds DocValues statistics for a sorted-numeric field storing {@code double} values. */ |
| public static final class SortedDoubleDocValuesStats extends SortedNumericDocValuesStats<Double> { |
| |
| // To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable. |
| private double sum = 0; |
| |
| public SortedDoubleDocValuesStats(String field) { |
| super(field, Double.MAX_VALUE, Double.MIN_VALUE); |
| } |
| |
| @Override |
| protected void doAccumulate(int count) throws IOException { |
| int numValues = sndv.docValueCount(); |
| while (numValues-- > 0) { |
| double val = Double.longBitsToDouble(sndv.nextValue()); |
| if (Double.compare(val, max) > 0) { |
| max = val; |
| } |
| if (Double.compare(val, min) < 0) { |
| min = val; |
| } |
| sum += val; |
| double oldMean = mean; |
| // for correct "running average computation", increase valuesCount with each value, rather than once before the |
| // loop stats. |
| ++valuesCount; |
| mean += (val - mean) / valuesCount; |
| variance += (val - mean) * (val - oldMean); |
| } |
| } |
| |
| @Override |
| public Double sum() { |
| return sum; |
| } |
| } |
| |
| private static BytesRef copyFrom(BytesRef src, BytesRef dest) { |
| if (dest == null) { |
| return BytesRef.deepCopyOf(src); |
| } |
| |
| dest.bytes = ArrayUtil.grow(dest.bytes, src.length); |
| System.arraycopy(src.bytes, src.offset, dest.bytes, 0, src.length); |
| dest.offset = 0; |
| dest.length = src.length; |
| return dest; |
| } |
| |
| /** Holds statistics for a sorted DocValues field. */ |
| public static class SortedDocValuesStats extends DocValuesStats<BytesRef> { |
| |
| protected SortedDocValues sdv; |
| |
| protected SortedDocValuesStats(String field) { |
| super(field, null, null); |
| } |
| |
| @Override |
| protected final boolean init(LeafReaderContext context) throws IOException { |
| sdv = context.reader().getSortedDocValues(field); |
| return sdv != null; |
| } |
| |
| @Override |
| protected final boolean hasValue(int doc) throws IOException { |
| return sdv.advanceExact(doc); |
| } |
| |
| @Override |
| protected void doAccumulate(int count) throws IOException { |
| BytesRef val = sdv.binaryValue(); |
| if (max == null || val.compareTo(max) > 0) { |
| max = copyFrom(val, max); |
| } |
| if (min == null || val.compareTo(min) < 0) { |
| min = copyFrom(val, min); |
| } |
| } |
| } |
| |
| /** Holds statistics for a sorted-set DocValues field. */ |
| public static class SortedSetDocValuesStats extends DocValuesStats<BytesRef> { |
| |
| protected SortedSetDocValues ssdv; |
| |
| protected SortedSetDocValuesStats(String field) { |
| super(field, null, null); |
| } |
| |
| @Override |
| protected final boolean init(LeafReaderContext context) throws IOException { |
| ssdv = context.reader().getSortedSetDocValues(field); |
| return ssdv != null; |
| } |
| |
| @Override |
| protected final boolean hasValue(int doc) throws IOException { |
| return ssdv.advanceExact(doc); |
| } |
| |
| @Override |
| protected void doAccumulate(int count) throws IOException { |
| long ord; |
| while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { |
| BytesRef val = ssdv.lookupOrd(ord); |
| if (max == null || val.compareTo(max) > 0) { |
| max = copyFrom(val, max); |
| } |
| if (min == null || val.compareTo(min) < 0) { |
| min = copyFrom(val, min); |
| } |
| } |
| } |
| } |
| |
| } |