blob: e89cfd86b931e0a353c6f07456ac2cb9684793be [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.function.IntFunction;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
/**
* Accumulates statistics separated by a slot number.
* There is a separate statistic per slot. The slot is usually an ordinal into a set of values, e.g. tracking a count
* frequency <em>per term</em>.
* Sometimes there doesn't need to be a slot distinction, in which case there is just one nominal slot.
*/
public abstract class SlotAcc implements Closeable {
String key; // todo...
protected final FacetContext fcontext;
protected LeafReaderContext currentReaderContext;
protected int currentDocBase;
public SlotAcc(FacetContext fcontext) {
this.fcontext = fcontext;
}
@Override public String toString() { return key; }
/**
* NOTE: this currently detects when it is being reused and calls resetIterators by comparing reader ords
* with previous calls to setNextReader. For this reason, current users must call setNextReader
* in segment order. Failure to do so will cause worse performance.
*/
public void setNextReader(LeafReaderContext readerContext) throws IOException {
LeafReaderContext lastReaderContext = currentReaderContext;
currentReaderContext = readerContext;
currentDocBase = currentReaderContext.docBase;
if (lastReaderContext == null || lastReaderContext.ord >= currentReaderContext.ord) {
// if we went backwards (or reused) a reader, we need to reset iterators that can be used only once.
resetIterators();
}
}
/**
* All subclasses must override this method to collect documents. This method is called by the
* default impl of {@link #collect(DocSet, int, IntFunction)} but it's also neccessary if this accumulator
* is used for sorting.
*
* @param doc Single Segment docId (relative to the current {@link LeafReaderContext} to collect
* @param slot The slot number to collect this document in
* @param slotContext A callback that can be used for Accumulators that would like additional info
* about the current slot -- the {@link IntFunction} is only garunteed to be valid for
* the current slot, and the {@link SlotContext} returned is only valid for the duration
* of the <code>collect()</code> call.
*/
public abstract void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException;
/**
* Bulk collection of all documents in a slot. The default implementation calls {@link #collect(int, int, IntFunction)}
*
* @param docs (global) Documents to collect
* @param slot The slot number to collect these documents in
* @param slotContext A callback that can be used for Accumulators that would like additional info
* about the current slot -- the {@link IntFunction} is only garunteed to be valid for
* the current slot, and the {@link SlotContext} returned is only valid for the duration
* of the <code>collect()</code> call.
*/
public int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
int count = 0;
SolrIndexSearcher searcher = fcontext.searcher;
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
int segBase = 0;
int segMax;
int adjustedMax = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
if (ctx == null) {
// should be impossible
throw new RuntimeException("INTERNAL FACET ERROR");
}
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
assert doc >= ctx.docBase;
setNextReader(ctx);
}
count++;
collect(doc - segBase, slot, slotContext); // per-seg collectors
}
return count;
}
public abstract int compare(int slotA, int slotB);
public abstract Object getValue(int slotNum) throws IOException;
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
if (key == null) return;
Object val = getValue(slotNum);
if (val != null) {
bucket.add(key, val);
}
}
/**
* Called to reset the acc to a fresh state, ready for reuse
*/
public abstract void reset() throws IOException;
/**
* Typically called from setNextReader to reset docValue iterators
*/
protected void resetIterators() throws IOException {
}
;
public abstract void resize(Resizer resizer);
@Override
public void close() throws IOException {
}
public static abstract class Resizer {
public abstract int getNewSize();
public abstract int getNewSlot(int oldSlot);
public double[] resize(double[] old, double defaultValue) {
double[] values = new double[getNewSize()];
if (defaultValue != 0) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
double val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
public int[] resize(int[] old, int defaultValue) {
int[] values = new int[getNewSize()];
if (defaultValue != 0) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
int val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
public long[] resize(long[] old, long defaultValue) {
long[] values = new long[getNewSize()];
if (defaultValue != 0) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
long val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
public FixedBitSet resize(FixedBitSet old) {
FixedBitSet values = new FixedBitSet(getNewSize());
int oldSize = old.length();
for (int oldSlot = 0; ; ) {
oldSlot = values.nextSetBit(oldSlot);
if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break;
int newSlot = getNewSlot(oldSlot);
values.set(newSlot);
if (++oldSlot >= oldSize) break;
}
return values;
}
public <T> T[] resize(T[] old, T defaultValue) {
@SuppressWarnings({"unchecked"})
T[] values = (T[]) Array.newInstance(old.getClass().getComponentType(), getNewSize());
if (defaultValue != null) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
T val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
} // end class Resizer
/**
* Incapsulates information about the current slot, for Accumulators that may want
* additional info during collection.
*/
public static class SlotContext {
private final Query slotQuery;
public SlotContext(Query slotQuery) {
this.slotQuery = slotQuery;
}
/**
* behavior of this method is undefined if {@link #isAllBuckets} returns <code>true</code>
*/
public Query getSlotQuery() {
return slotQuery;
}
/**
* @return true if and only if this slot corrisponds to the <code>allBuckets</code> bucket.
* @see #getSlotQuery
*/
public boolean isAllBuckets() {
return false;
}
}
// TODO: we should really have a decoupled value provider...
// This would enhance reuse and also prevent multiple lookups of same value across diff stats
public abstract static class FuncSlotAcc extends SlotAcc {
protected final ValueSource valueSource;
protected FunctionValues values;
public FuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(fcontext);
this.valueSource = values;
}
@Override
@SuppressWarnings({"unchecked"})
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
values = valueSource.getValues(fcontext.qcontext, readerContext);
}
}
// have a version that counts the number of times a Slot has been hit? (for avg... what else?)
// TODO: make more sense to have func as the base class rather than double?
// double-slot-func -> func-slot -> slot -> acc
// double-slot-func -> double-slot -> slot -> acc
public abstract static class DoubleFuncSlotAcc extends FuncSlotAcc {
protected double[] result; // TODO: use DoubleArray
protected double initialValue;
public DoubleFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
this(values, fcontext, numSlots, 0);
}
public DoubleFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots, double initialValue) {
super(values, fcontext, numSlots);
this.initialValue = initialValue;
result = new double[numSlots];
if (initialValue != 0) {
reset();
}
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slot) {
return result[slot];
}
@Override
public void reset() {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, initialValue);
}
}
public abstract static class LongFuncSlotAcc extends FuncSlotAcc {
protected long[] result;
protected long initialValue;
public LongFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots, long initialValue) {
super(values, fcontext, numSlots);
this.initialValue = initialValue;
result = new long[numSlots];
if (initialValue != 0) {
reset();
}
}
@Override
public int compare(int slotA, int slotB) {
return Long.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slot) {
return result[slot];
}
@Override
public void reset() {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, initialValue);
}
}
public abstract static class IntSlotAcc extends SlotAcc {
protected int[] result; // use LongArray32
protected int initialValue;
public IntSlotAcc(FacetContext fcontext, int numSlots, int initialValue) {
super(fcontext);
this.initialValue = initialValue;
result = new int[numSlots];
if (initialValue != 0) {
reset();
}
}
@Override
public int compare(int slotA, int slotB) {
return Integer.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slot) {
return result[slot];
}
@Override
public void reset() {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, initialValue);
}
}
static class SumSlotAcc extends DoubleFuncSlotAcc {
public SumSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
}
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) throws IOException {
double val = values.doubleVal(doc); // todo: worth trying to share this value across multiple stats that need it?
result[slotNum] += val;
}
}
static class SumsqSlotAcc extends DoubleFuncSlotAcc {
public SumsqSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
}
@Override
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) throws IOException {
double val = values.doubleVal(doc);
val = val * val;
result[slotNum] += val;
}
}
static class AvgSlotAcc extends DoubleFuncSlotAcc {
int[] counts;
public AvgSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
counts = new int[numSlots];
}
@Override
public void reset() {
super.reset();
for (int i = 0; i < counts.length; i++) {
counts[i] = 0;
}
}
@Override
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) throws IOException {
double val = values.doubleVal(doc);
if (val != 0 || values.exists(doc)) {
result[slotNum] += val;
counts[slotNum] += 1;
}
}
private double avg(int slot) {
return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(avg(slotA), avg(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList<Object> lst = new ArrayList<>(2);
lst.add(counts[slot]);
lst.add(result[slot]);
return lst;
} else {
return avg(slot);
}
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
counts = resizer.resize(counts, 0);
}
}
static class VarianceSlotAcc extends DoubleFuncSlotAcc {
int[] counts;
double[] sum;
public VarianceSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
counts = new int[numSlots];
sum = new double[numSlots];
}
@Override
public void reset() {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
this.counts = resizer.resize(this.counts, 0);
this.sum = resizer.resize(this.sum, 0);
}
private double variance(int slot) {
return AggUtil.uncorrectedVariance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(this.variance(slotA), this.variance(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList<Object> lst = new ArrayList<>(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return this.variance(slot);
}
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
double val = values.doubleVal(doc);
if (values.exists(doc)) {
counts[slot]++;
result[slot] += val * val;
sum[slot] += val;
}
}
}
static class StddevSlotAcc extends DoubleFuncSlotAcc {
int[] counts;
double[] sum;
public StddevSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
counts = new int[numSlots];
sum = new double[numSlots];
}
@Override
public void reset() {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
this.counts = resizer.resize(this.counts, 0);
this.result = resizer.resize(this.result, 0);
}
private double stdDev(int slot) {
return AggUtil.uncorrectedStdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(this.stdDev(slotA), this.stdDev(slotB));
}
@Override
@SuppressWarnings({"unchecked"})
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList<Object> lst = new ArrayList<>(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return this.stdDev(slot);
}
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
double val = values.doubleVal(doc);
if (values.exists(doc)) {
counts[slot]++;
result[slot] += val * val;
sum[slot] += val;
}
}
}
/**
* Implemented by some SlotAccs if they are capable of being used for
* sweep collecting in compatible facet processors
* @see FacetFieldProcessor#registerSweepingAccIfSupportedByCollectAcc()
*/
static interface SweepableSlotAcc<T extends SlotAcc> {
/**
* Called by processors if they support sweeping. Implementations will often
* return self or null (the latter indicating that all necessary collection will
* be covered by the "sweeping" data structures registered with the specified
* baseSweepingAcc as a result of the call to this method).
*
* If an implementing instance chooses to replace itself with another {@link SlotAcc}, it must
* call {@link SweepingCountSlotAcc#registerMapping(SlotAcc, SlotAcc)} on the specified
* baseSweepingAcc to notify it of the mapping from original SlotAcc to the SlotAcc that should
* be used for purposes of read access. It is the responsibility of the specified {@link SweepingCountSlotAcc}
* to ensure proper placement/accessibility of the SlotAcc to be used for read access.
*
* The replacement SlotAcc registered via {@link SweepingCountSlotAcc#registerMapping(SlotAcc, SlotAcc)}
* will be responsible for output via its {@link SlotAcc#setValues(SimpleOrderedMap, int)} method.
* An implementer of this method may register such a replacement, and also return a non-null
* SlotAcc to be used for normal collection (via {@link FacetFieldProcessor#collectAcc}). In this case,
* the implementer should take care that the returned {@link SlotAcc} is different from the {@link SlotAcc}
* registered for the purpose of output -- with the former overriding {@link SlotAcc#setValues(SimpleOrderedMap, int)}
* as a no-op, to prevent setting duplicate values.
*
* @param baseSweepingAcc - never null, where the SlotAcc may register domains for sweep collection,
* and must register mappings of new read-access SlotAccs that result from this call.
* @return SlotAcc to be used for purpose of collection. If null then collect methods will
* never be called on this SlotAcc.
*/
public T registerSweepingAccs(SweepingCountSlotAcc baseSweepingAcc);
}
/**
* A simple data structure to {@link DocSet} domains with an associated {@link CountSlotAcc}. This may be used
* to support sweep count accumulation over different {@link DocSet} domains, but the concept is perfectly applicable
* to encapsulating the relevant state for simple "non-sweep" collection as well (in which case {@link SweepCountAccStruct#docSet}
* would be {@link FacetContext#base}, {@link SweepCountAccStruct#countAcc} would be {@link FacetProcessor#countAcc}, and
* {@link SweepCountAccStruct#isBase} would trivially be "true").
*/
static final class SweepCountAccStruct {
final DocSet docSet;
final boolean isBase;
final CountSlotAcc countAcc;
public SweepCountAccStruct(DocSet docSet, boolean isBase, CountSlotAcc countAcc) {
this.docSet = docSet;
this.isBase = isBase;
this.countAcc = countAcc;
}
public SweepCountAccStruct(SweepCountAccStruct t, DocSet replaceDocSet) {
this.docSet = replaceDocSet;
this.isBase = t.isBase;
this.countAcc = t.countAcc;
}
/**
* Because sweep collection offloads "collect" methods to count accumulation code,
* it is helpful to provide a read-only view over the backing {@link CountSlotAcc}
*
* @return - a read-only view over {@link #countAcc}
*/
public ReadOnlyCountSlotAcc roCountAcc() {
return countAcc;
}
@Override public String toString() {
return this.countAcc.toString();
}
}
/**
* Special CountSlotAcc used by processors that support sweeping to decide what to sweep over and how to "collect"
* when doing the sweep.
*
* This class may be used by instances of {@link SweepableSlotAcc} to register DocSet domains (via {@link SweepingCountSlotAcc#add})
* over which to sweep-collect facet counts.
*
* @see SweepableSlotAcc#registerSweepingAccs
*/
static class SweepingCountSlotAcc extends CountSlotArrAcc {
static final String SWEEP_COLLECTION_DEBUG_KEY = "sweep_collection";
private final SimpleOrderedMap<Object> debug;
private final FacetFieldProcessor p;
final SweepCountAccStruct base;
final List<SweepCountAccStruct> others = new ArrayList<>();
private final List<SlotAcc> output = new ArrayList<>();
SweepingCountSlotAcc(int numSlots, FacetFieldProcessor p) {
super(p.fcontext, numSlots);
this.p = p;
this.base = new SweepCountAccStruct(fcontext.base, true, this);
final FacetDebugInfo fdebug = fcontext.getDebugInfo();
this.debug = null != fdebug ? new SimpleOrderedMap<>() : null;
if (null != this.debug) {
fdebug.putInfoItem(SWEEP_COLLECTION_DEBUG_KEY, debug);
debug.add("base", key);
debug.add("accs", new ArrayList<String>());
debug.add("mapped", new ArrayList<String>());
}
}
/**
* Called by SweepableSlotAccs to register new DocSet domains for sweep collection
*
* @param key
* assigned to the returned SlotAcc, and used for debugging
* @param docs
* the domain over which to sweep
* @param numSlots
* the number of slots
* @return a read-only representation of the count acc which is guaranteed to be populated after sweep count
* collection
*/
public ReadOnlyCountSlotAcc add(String key, DocSet docs, int numSlots) {
final CountSlotAcc count = new CountSlotArrAcc(fcontext, numSlots);
count.key = key;
final SweepCountAccStruct ret = new SweepCountAccStruct(docs, false, count);
if (null != debug) {
@SuppressWarnings("unchecked")
List<String> accsDebug = (List<String>) debug.get("accs");
accsDebug.add(ret.toString());
}
others.add(ret);
return ret.roCountAcc();
}
/**
* When a {@link SweepableSlotAcc} replaces itself (for the purpose of collection) with a different {@link SlotAcc}
* instance, it must register that replacement by calling this method with itself as the fromAcc param, and with the
* new replacement {@link SlotAcc} as the toAcc param. The two SlotAccs must have the same {@link SlotAcc#key}.
*
* It is the responsibility of this method to insure that {@link FacetFieldProcessor} references to fromAcc (other than
* those within {@link FacetFieldProcessor#collectAcc}, which are set directly by the return value of
* {@link SweepableSlotAcc#registerSweepingAccs(SweepingCountSlotAcc)}) are replaced
* by references to toAcc. Such references would include, e.g., {@link FacetFieldProcessor#sortAcc}.
*
* It is also this method's responsibility to insure that read access to toAcc (via toAcc's {@link SlotAcc#setValues(SimpleOrderedMap, int)}
* method) is provided via this instance's {@link #setValues(SimpleOrderedMap, int)} method.
*
* @param fromAcc - the {@link SlotAcc} to be replaced (this will normally be the caller of this method).
* @param toAcc - the replacement {@link SlotAcc}
*
* @see SweepableSlotAcc#registerSweepingAccs(SweepingCountSlotAcc)
*/
public void registerMapping(SlotAcc fromAcc, SlotAcc toAcc) {
assert fromAcc.key.equals(toAcc.key);
output.add(toAcc);
if (p.sortAcc == fromAcc) {
p.sortAcc = toAcc;
}
if (null != debug) {
@SuppressWarnings("unchecked")
List<String> mappedDebug = (List<String>) debug.get("mapped");
mappedDebug.add(fromAcc.toString());
}
}
/**
* Always populates the bucket with the current count for that slot. If the count is positive, or if
* <code>processEmpty==true</code>, then this method also populates the values from mapped "output" accumulators.
*
* @see #setSweepValues
*/
@Override
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
super.setValues(bucket, slotNum);
if (0 < getCount(slotNum) || fcontext.processor.freq.processEmpty) {
setSweepValues(bucket, slotNum);
}
}
/**
* Populates the bucket with the values from all mapped "output" accumulators for the specified slot.
*
* This method exists because there are some contexts (namely SpecialSlotAcc, for allBuckets, etc.) in which "base"
* count is tracked differently, via getSpecialCount(). For such cases, we need a method that allows the caller to
* directly coordinate calling {@link SlotAcc#setValues} on the sweeping output accs, while avoiding the inclusion
* of {@link CountSlotAcc#setValues CountSlotAcc.setValues}
*/
public void setSweepValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
for (SlotAcc acc : output) {
acc.setValues(bucket, slotNum);
}
}
/**
* Helper method for code that wants to operating in a sweeping manner even if the current processor
* is not using sweeping.
*
* @returns struct that wraps the {@link FacetContext#base} unless the {@link FacetProcessor#countAcc} is a {@link SweepingCountSlotAcc}
*/
public static SweepCountAccStruct baseStructOf(FacetProcessor<?> processor) {
if (processor.countAcc instanceof SweepingCountSlotAcc) {
return ((SweepingCountSlotAcc) processor.countAcc).base;
}
return new SweepCountAccStruct(processor.fcontext.base, true, processor.countAcc);
}
/**
* Helper method for code that wants to operating in a sweeping manner even if the current processor
* is not using sweeping
*
* @returns empty list unless the {@link FacetProcessor#countAcc} is a {@link SweepingCountSlotAcc}
*/
public static List<SweepCountAccStruct> otherStructsOf(FacetProcessor<?> processor) {
if (processor.countAcc instanceof SweepingCountSlotAcc) {
return ((SweepingCountSlotAcc) processor.countAcc).others;
}
return Collections.emptyList();
}
}
abstract static class CountSlotAcc extends SlotAcc implements ReadOnlyCountSlotAcc {
public CountSlotAcc(FacetContext fcontext) {
super(fcontext);
// assume we are the 'count' by default unless/untill our creator overrides this
this.key = "count";
}
public abstract void incrementCount(int slot, int count);
public abstract int getCount(int slot);
}
/**
* This CountSlotAcc exists as a /dev/null sink for callers of collect(...) and other "write"-type
* methods. It should be used in contexts where "read"-type access methods will never be called.
*/
static final CountSlotAcc DEV_NULL_SLOT_ACC = new CountSlotAcc(null) {
@Override
public void resize(Resizer resizer) {
// No-op
}
@Override
public void reset() throws IOException {
// No-op
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
// No-op
}
@Override
public void incrementCount(int slot, int count) {
// No-op
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
// No-op
}
@Override
public int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
return docs.size(); // dressed up no-op
}
@Override
public Object getValue(int slotNum) throws IOException {
throw new UnsupportedOperationException("not supported");
}
@Override
public int compare(int slotA, int slotB) {
throw new UnsupportedOperationException("not supported");
}
@Override
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
throw new UnsupportedOperationException("not supported");
}
@Override
public int getCount(int slot) {
throw new UnsupportedOperationException("not supported");
}
};
static class CountSlotArrAcc extends CountSlotAcc {
int[] result;
public CountSlotArrAcc(FacetContext fcontext, int numSlots) {
super(fcontext);
result = new int[numSlots];
}
@Override
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) {
// TODO: count arrays can use fewer bytes based on the number of docs in
// the base set (that's the upper bound for single valued) - look at ttf?
result[slotNum]++;
}
@Override
public int compare(int slotA, int slotB) {
return Integer.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
public void incrementCount(int slot, int count) {
result[slot] += count;
}
public int getCount(int slot) {
return result[slot];
}
// internal and expert
int[] getCountArray() {
return result;
}
@Override
public void reset() {
Arrays.fill(result, 0);
}
@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, 0);
}
}
static class SortSlotAcc extends SlotAcc {
public SortSlotAcc(FacetContext fcontext) {
super(fcontext);
}
@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
// no-op
}
public int compare(int slotA, int slotB) {
return slotA - slotB;
}
@Override
public Object getValue(int slotNum) {
return slotNum;
}
@Override
public void reset() {
// no-op
}
@Override
public void resize(Resizer resizer) {
// sort slot only works with direct-mapped accumulators
throw new UnsupportedOperationException();
}
}
}