blob: d334ffc6d5996b120537937a6c31ca8afd42a789 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.Collections;
import java.util.Date;
import java.util.Map;
import java.util.HashMap;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.*;
/**
* Factory class for creating instance of {@link org.apache.solr.handler.component.StatsValues}
*/
public class StatsValuesFactory {
/**
* Creates an instance of StatsValues which supports values from a field of the given FieldType
*
* @param sf SchemaField for the field whose statistics will be created by the resulting StatsValues
* @return Instance of StatsValues that will create statistics from values from a field of the given type
*/
public static StatsValues createStatsValues(SchemaField sf) {
// TODO: allow for custom field types
FieldType fieldType = sf.getType();
if (DoubleField.class.isInstance(fieldType) ||
IntField.class.isInstance(fieldType) ||
LongField.class.isInstance(fieldType) ||
FloatField.class.isInstance(fieldType) ||
TrieField.class.isInstance(fieldType) ||
SortableDoubleField.class.isInstance(fieldType) ||
SortableIntField.class.isInstance(fieldType) ||
SortableLongField.class.isInstance(fieldType) ||
SortableFloatField.class.isInstance(fieldType)) {
return new NumericStatsValues(sf);
} else if (DateField.class.isInstance(fieldType)) {
return new DateStatsValues(sf);
} else if (StrField.class.isInstance(fieldType)) {
return new StringStatsValues(sf);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported");
}
}
}
/**
* Abstract implementation of {@link org.apache.solr.handler.component.StatsValues} that provides the default behavior
* for most StatsValues implementations.
*
* There are very few requirements placed on what statistics concrete implementations should collect, with the only required
* statistics being the minimum and maximum values.
*/
abstract class AbstractStatsValues<T> implements StatsValues {
private static final String FACETS = "facets";
final protected SchemaField sf;
final protected FieldType ft;
protected T max;
protected T min;
protected long missing;
protected long count;
private ValueSource valueSource;
protected FunctionValues values;
// facetField facetValue
protected Map<String, Map<String, StatsValues>> facets = new HashMap<String, Map<String, StatsValues>>();
protected AbstractStatsValues(SchemaField sf) {
this.sf = sf;
this.ft = sf.getType();
}
/**
* {@inheritDoc}
*/
@Override
public void accumulate(NamedList stv) {
count += (Long) stv.get("count");
missing += (Long) stv.get("missing");
updateMinMax((T) stv.get("min"), (T) stv.get("max"));
updateTypeSpecificStats(stv);
NamedList f = (NamedList) stv.get(FACETS);
if (f == null) {
return;
}
for (int i = 0; i < f.size(); i++) {
String field = f.getName(i);
NamedList vals = (NamedList) f.getVal(i);
Map<String, StatsValues> addTo = facets.get(field);
if (addTo == null) {
addTo = new HashMap<String, StatsValues>();
facets.put(field, addTo);
}
for (int j = 0; j < vals.size(); j++) {
String val = vals.getName(j);
StatsValues vvals = addTo.get(val);
if (vvals == null) {
vvals = StatsValuesFactory.createStatsValues(sf);
addTo.put(val, vvals);
}
vvals.accumulate((NamedList) vals.getVal(j));
}
}
}
/**
* {@inheritDoc}
*/
@Override
public void accumulate(BytesRef value, int count) {
T typedValue = (T)ft.toObject(sf, value);
accumulate(typedValue, count);
}
public void accumulate(T value, int count) {
this.count += count;
updateMinMax(value, value);
updateTypeSpecificStats(value, count);
}
/**
* {@inheritDoc}
*/
@Override
public void missing() {
missing++;
}
/**
* {@inheritDoc}
*/
@Override
public void addMissing(int count) {
missing += count;
}
/**
* {@inheritDoc}
*/
@Override
public void addFacet(String facetName, Map<String, StatsValues> facetValues) {
facets.put(facetName, facetValues);
}
/**
* {@inheritDoc}
*/
@Override
public NamedList<?> getStatsValues() {
NamedList<Object> res = new SimpleOrderedMap<Object>();
res.add("min", min);
res.add("max", max);
res.add("count", count);
res.add("missing", missing);
addTypeSpecificStats(res);
// add the facet stats
NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
for (Map.Entry<String, Map<String, StatsValues>> entry : facets.entrySet()) {
NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
nl.add(entry.getKey(), nl2);
for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
nl2.add(e2.getKey(), e2.getValue().getStatsValues());
}
}
res.add(FACETS, nl);
return res;
}
public void setNextReader(AtomicReaderContext ctx) throws IOException {
if (valueSource == null) {
valueSource = ft.getValueSource(sf, null);
}
values = valueSource.getValues(Collections.emptyMap(), ctx);
}
/**
* Updates the minimum and maximum statistics based on the given values
*
* @param min Value that the current minimum should be updated against
* @param max Value that the current maximum should be updated against
*/
protected abstract void updateMinMax(T min, T max);
/**
* Updates the type specific statistics based on the given value
*
* @param value Value the statistics should be updated against
* @param count Number of times the value is being accumulated
*/
protected abstract void updateTypeSpecificStats(T value, int count);
/**
* Updates the type specific statistics based on the values in the given list
*
* @param stv List containing values the current statistics should be updated against
*/
protected abstract void updateTypeSpecificStats(NamedList stv);
/**
* Add any type specific statistics to the given NamedList
*
* @param res NamedList to add the type specific statistics too
*/
protected abstract void addTypeSpecificStats(NamedList<Object> res);
}
/**
* Implementation of StatsValues that supports Double values
*/
class NumericStatsValues extends AbstractStatsValues<Number> {
double sum;
double sumOfSquares;
public NumericStatsValues(SchemaField sf) {
super(sf);
min = Double.POSITIVE_INFINITY;
max = Double.NEGATIVE_INFINITY;
}
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
accumulate((Number) values.objectVal(docID), 1);
} else {
missing();
}
}
/**
* {@inheritDoc}
*/
@Override
public void updateTypeSpecificStats(NamedList stv) {
sum += ((Number)stv.get("sum")).doubleValue();
sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
}
/**
* {@inheritDoc}
*/
@Override
public void updateTypeSpecificStats(Number v, int count) {
double value = v.doubleValue();
sumOfSquares += (value * value * count); // for std deviation
sum += value * count;
}
/**
* {@inheritDoc}
*/
@Override
protected void updateMinMax(Number min, Number max) {
this.min = Math.min(this.min.doubleValue(), min.doubleValue());
this.max = Math.max(this.max.doubleValue(), max.doubleValue());
}
/**
* Adds sum, sumOfSquares, mean and standard deviation statistics to the given NamedList
*
* @param res NamedList to add the type specific statistics too
*/
@Override
protected void addTypeSpecificStats(NamedList<Object> res) {
res.add("sum", sum);
res.add("sumOfSquares", sumOfSquares);
res.add("mean", sum / count);
res.add("stddev", getStandardDeviation());
}
/**
* Calculates the standard deviation statistic
*
* @return Standard deviation statistic
*/
private double getStandardDeviation() {
if (count <= 1.0D) {
return 0.0D;
}
return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
}
}
/**
* Implementation of StatsValues that supports Date values
*/
class DateStatsValues extends AbstractStatsValues<Date> {
private long sum = -1;
double sumOfSquares = 0;
public DateStatsValues(SchemaField sf) {
super(sf);
}
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
accumulate((Date) values.objectVal(docID), 1);
} else {
missing();
}
}
/**
* {@inheritDoc}
*/
@Override
protected void updateTypeSpecificStats(NamedList stv) {
sum += ((Date) stv.get("sum")).getTime();
sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
}
/**
* {@inheritDoc}
*/
@Override
public void updateTypeSpecificStats(Date v, int count) {
long value = v.getTime();
sumOfSquares += (value * value * count); // for std deviation
sum += value * count;
}
/**
* {@inheritDoc}
*/
@Override
protected void updateMinMax(Date min, Date max) {
if(this.min==null || this.min.after(min)) {
this.min = min;
}
if(this.max==null || this.max.before(min)) {
this.max = max;
}
}
/**
* Adds sum and mean statistics to the given NamedList
*
* @param res NamedList to add the type specific statistics too
*/
@Override
protected void addTypeSpecificStats(NamedList<Object> res) {
if(sum<=0) {
return; // date==0 is meaningless
}
res.add("sum", new Date(sum));
if (count > 0) {
res.add("mean", new Date(sum / count));
}
res.add("sumOfSquares", sumOfSquares);
res.add("stddev", getStandardDeviation());
}
/**
* Calculates the standard deviation. For dates, this is really the MS deviation
*
* @return Standard deviation statistic
*/
private double getStandardDeviation() {
if (count <= 1) {
return 0.0D;
}
return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
}
}
/**
* Implementation of StatsValues that supports String values
*/
class StringStatsValues extends AbstractStatsValues<String> {
public StringStatsValues(SchemaField sf) {
super(sf);
}
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
accumulate(values.strVal(docID), 1);
} else {
missing();
}
}
/**
* {@inheritDoc}
*/
@Override
protected void updateTypeSpecificStats(NamedList stv) {
// No type specific stats
}
/**
* {@inheritDoc}
*/
@Override
protected void updateTypeSpecificStats(String value, int count) {
// No type specific stats
}
/**
* {@inheritDoc}
*/
@Override
protected void updateMinMax(String min, String max) {
this.max = max(this.max, max);
this.min = min(this.min, min);
}
/**
* Adds no type specific statistics
*/
@Override
protected void addTypeSpecificStats(NamedList<Object> res) {
// Add no statistics
}
/**
* Determines which of the given Strings is the maximum, as computed by {@link String#compareTo(String)}
*
* @param str1 String to compare against b
* @param str2 String compared against a
* @return str1 if it is considered greater by {@link String#compareTo(String)}, str2 otherwise
*/
private static String max(String str1, String str2) {
if (str1 == null) {
return str2;
} else if (str2 == null) {
return str1;
}
return (str1.compareTo(str2) > 0) ? str1 : str2;
}
/**
* Determines which of the given Strings is the minimum, as computed by {@link String#compareTo(String)}
*
* @param str1 String to compare against b
* @param str2 String compared against a
* @return str1 if it is considered less by {@link String#compareTo(String)}, str2 otherwise
*/
private static String min(String str1, String str2) {
if (str1 == null) {
return str2;
} else if (str2 == null) {
return str1;
}
return (str1.compareTo(str2) < 0) ? str1 : str2;
}
}