blob: 0578b57d9f89c32f83e84c79dccd3bc966ac30bd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.range;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConjunctionDISI;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
/** Base class for range faceting.
*
* @lucene.experimental */
abstract class RangeFacetCounts extends Facets {
/** Ranges passed to constructor. */
protected final Range[] ranges;
/** Counts, initialized in by subclass. */
protected final int[] counts;
/** Optional: if specified, we first test this Query to
* see whether the document should be checked for
* matching ranges. If this is null, all documents are
* checked. */
protected final Query fastMatchQuery;
/** Our field name. */
protected final String field;
/** Total number of hits. */
protected int totCount;
/** Create {@code RangeFacetCounts} */
protected RangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) throws IOException {
this.field = field;
this.ranges = ranges;
this.fastMatchQuery = fastMatchQuery;
counts = new int[ranges.length];
}
/**
* Create a {@link org.apache.lucene.search.DocIdSetIterator} from the provided {@code hits} that
* relies on {@code fastMatchQuery} if available for first-pass filtering. A null response
* indicates no documents will match.
*/
protected DocIdSetIterator createIterator(FacetsCollector.MatchingDocs hits) throws IOException {
if (fastMatchQuery != null) {
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
searcher.setQueryCache(null);
final Weight fastMatchWeight =
searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
final Scorer s = fastMatchWeight.scorer(hits.context);
if (s == null) {
return null; // no hits from the fastMatchQuery; return null
} else {
DocIdSetIterator fastMatchDocs = s.iterator();
return ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), fastMatchDocs));
}
} else {
return hits.bits.iterator();
}
}
protected abstract LongRange[] getLongRanges();
/** Allow sub-classes to (optionally) map from the stored long bits to a long that should be
* used for the actual counting. Default behavior is a no-op.
*/
protected long mapDocValue(long l) {
return l;
}
/** Counts from the provided field. */
protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
throws IOException {
// load doc values for all segments up front and keep track of whether-or-not we found any that
// were actually multi-valued. this allows us to optimize the case where all segments contain
// single-values.
SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
NumericDocValues[] singleValuedDocVals = null;
boolean foundMultiValued = false;
for (int i = 0; i < matchingDocs.size(); i++) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
multiValuedDocVals[i] = multiValues;
// only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
if (foundMultiValued == false) {
NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
if (singleValues != null) {
if (singleValuedDocVals == null) {
singleValuedDocVals = new NumericDocValues[matchingDocs.size()];
}
singleValuedDocVals[i] = singleValues;
} else {
foundMultiValued = true;
}
}
}
// we only need to keep around one or the other at this point
if (foundMultiValued) {
singleValuedDocVals = null;
} else {
multiValuedDocVals = null;
}
LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts);
int missingCount = 0;
// if we didn't find any multi-valued cases, we can run a more optimal counting algorithm
if (foundMultiValued == false) {
for (int i = 0; i < matchingDocs.size(); i++) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
assert singleValuedDocVals != null;
NumericDocValues singleValues = singleValuedDocVals[i];
totCount += hits.totalHits;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
if (singleValues.advanceExact(doc)) {
counter.addSingleValued(mapDocValue(singleValues.longValue()));
} else {
missingCount++;
}
doc = it.nextDoc();
}
}
} else {
for (int i = 0; i < matchingDocs.size(); i++) {
final DocIdSetIterator it = createIterator(matchingDocs.get(i));
if (it == null) {
continue;
}
SortedNumericDocValues multiValues = multiValuedDocVals[i];
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
if (multiValues.advanceExact(doc)) {
int limit = multiValues.docValueCount();
// optimize single-value case
if (limit == 1) {
counter.addSingleValued(mapDocValue(multiValues.nextValue()));
totCount++;
} else {
counter.startMultiValuedDoc();
for (int j = 0; j < limit; j++) {
counter.addMultiValued(mapDocValue(multiValues.nextValue()));
}
if (counter.endMultiValuedDoc()) {
totCount++;
}
}
}
doc = it.nextDoc();
}
}
}
missingCount += counter.finish();
totCount -= missingCount;
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) {
if (dim.equals(field) == false) {
throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
}
if (path.length != 0) {
throw new IllegalArgumentException("path.length should be 0");
}
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
for(int i=0;i<counts.length;i++) {
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
}
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
// TODO: should we impl this?
throw new UnsupportedOperationException();
}
@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
return Collections.singletonList(getTopChildren(topN, field));
}
@Override
public String toString() {
StringBuilder b = new StringBuilder();
b.append("RangeFacetCounts totCount=");
b.append(totCount);
b.append(":\n");
for(int i=0;i<ranges.length;i++) {
b.append(" ");
b.append(ranges[i].label);
b.append(" -> count=");
b.append(counts[i]);
b.append('\n');
}
return b.toString();
}
}