Initialize facet counting data structures lazily (#12408) (#13300)
This change covers:
* Taxonomy faceting
* FastTaxonomyFacetCounts
* TaxonomyFacetIntAssociations
* TaxonomyFacetFloatAssociations
* SSDV faceting
* SortedSetDocValuesFacetCounts
* ConcurrentSortedSetDocValuesFacetCounts
* StringValueFacetCounts
* Range faceting:
* LongRangeFacetCounts
* DoubleRangeFacetCounts
* Long faceting:
* LongValueFacetCounts
Left for a future iteration:
* RangeOnRange faceting
* FacetSet faceting
Co-authored-by: Greg Miller <gsmiller@gmail.com>
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4bb287a..d30f50f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -100,6 +100,9 @@
* GITHUB#13321: Improve compressed int4 quantized vector search by utilizing SIMD inline with the decompression
process. (Ben Trent)
+* GITHUB#12408: Lazy initialization improvements for Facets implementations when there are segments with no hits
+ to count. (Greg Miller)
+
Bug Fixes
---------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java
index edbe2b6..a8d9411 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java
@@ -51,10 +51,13 @@
public class LongValueFacetCounts extends Facets {
/** Used for all values that are < 1K. */
- private final int[] counts = new int[1024];
+ private int[] counts;
/** Used for all values that are >= 1K. */
- private final LongIntHashMap hashCounts = new LongIntHashMap();
+ private LongIntHashMap hashCounts;
+
+ /** Whether-or-not counters have been initialized. */
+ private boolean initialized;
/** Field being counted. */
private final String field;
@@ -125,6 +128,7 @@
public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader)
throws IOException {
this.field = field;
+ initializeCounters();
if (valueSource != null) {
countAll(reader, valueSource);
} else {
@@ -141,6 +145,7 @@
public LongValueFacetCounts(String field, MultiLongValuesSource valuesSource, IndexReader reader)
throws IOException {
this.field = field;
+ initializeCounters();
if (valuesSource != null) {
LongValuesSource singleValued = MultiLongValuesSource.unwrapSingleton(valuesSource);
if (singleValued != null) {
@@ -153,11 +158,25 @@
}
}
+ private void initializeCounters() {
+ if (initialized) {
+ return;
+ }
+ assert counts == null && hashCounts == null;
+ initialized = true;
+ counts = new int[1024];
+ hashCounts = new LongIntHashMap();
+ }
+
/** Counts from the provided valueSource. */
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeCounters();
LongValues fv = valueSource.getValues(hits.context, null);
@@ -183,6 +202,10 @@
private void count(MultiLongValuesSource valuesSource, List<MatchingDocs> matchingDocs)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeCounters();
MultiLongValues multiValues = valuesSource.getValues(hits.context);
@@ -213,6 +236,10 @@
/** Counts from the field's indexed doc values. */
private void count(String field, List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeCounters();
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
@@ -350,6 +377,13 @@
@Override
public FacetResult getAllChildren(String dim, String... path) throws IOException {
validateDimAndPathForGetChildren(dim, path);
+
+ if (initialized == false) {
+ // nothing was counted (either no hits or no values for all hits):
+ assert totCount == 0;
+ return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
+ }
+
List<LabelAndValue> labelValues = new ArrayList<>();
for (int i = 0; i < counts.length; i++) {
if (counts[i] != 0) {
@@ -394,6 +428,12 @@
*/
@Deprecated
public FacetResult getTopChildrenSortByCount(int topN) {
+ if (initialized == false) {
+ // nothing was counted (either no hits or no values for all hits):
+ assert totCount == 0;
+ return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
+ }
+
PriorityQueue<Entry> pq =
new PriorityQueue<>(Math.min(topN, counts.length + hashCounts.size())) {
@Override
@@ -450,6 +490,12 @@
* efficient to use {@link #getAllChildren(String, String...)}.
*/
public FacetResult getAllChildrenSortByValue() {
+ if (initialized == false) {
+ // nothing was counted (either no hits or no values for all hits):
+ assert totCount == 0;
+ return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
+ }
+
List<LabelAndValue> labelValues = new ArrayList<>();
// compact & sort hash table's arrays by value
@@ -543,27 +589,29 @@
StringBuilder b = new StringBuilder();
b.append("LongValueFacetCounts totCount=");
b.append(totCount);
- b.append(":\n");
- for (int i = 0; i < counts.length; i++) {
- if (counts[i] != 0) {
- b.append(" ");
- b.append(i);
- b.append(" -> count=");
- b.append(counts[i]);
- b.append('\n');
- }
- }
-
- if (hashCounts.size() != 0) {
- for (LongIntCursor c : hashCounts) {
- if (c.value != 0) {
+ if (initialized) {
+ b.append(":\n");
+ for (int i = 0; i < counts.length; i++) {
+ if (counts[i] != 0) {
b.append(" ");
- b.append(c.key);
+ b.append(i);
b.append(" -> count=");
- b.append(c.value);
+ b.append(counts[i]);
b.append('\n');
}
}
+
+ if (hashCounts.size() != 0) {
+ for (LongIntCursor c : hashCounts) {
+ if (c.value != 0) {
+ b.append(" ");
+ b.append(c.key);
+ b.append(" -> count=");
+ b.append(c.value);
+ b.append('\n');
+ }
+ }
+ }
}
return b.toString();
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index 957efec..335f93d 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -69,8 +69,9 @@
private final OrdinalMap ordinalMap;
private final SortedSetDocValues docValues;
- private final int[] denseCounts;
+ private int[] denseCounts;
private final IntIntHashMap sparseCounts;
+ private boolean initialized;
private final int cardinality;
private int totalDocCount;
@@ -101,7 +102,9 @@
if (facetsCollector != null) {
if (cardinality < 1024) { // count densely for low cardinality
sparseCounts = null;
- denseCounts = new int[cardinality];
+ denseCounts = null;
+ initialized = false;
+ count(facetsCollector);
} else {
int totalHits = 0;
int totalDocs = 0;
@@ -110,22 +113,31 @@
totalDocs += matchingDocs.context.reader().maxDoc();
}
- // If our result set is < 10% of the index, we collect sparsely (use hash map). This
- // heuristic is borrowed from IntTaxonomyFacetCounts:
- if (totalHits < totalDocs / 10) {
- sparseCounts = new IntIntHashMap();
- denseCounts = null;
- } else {
+ // No counting needed if there are no hits:
+ if (totalHits == 0) {
sparseCounts = null;
- denseCounts = new int[cardinality];
+ denseCounts = null;
+ initialized = true;
+ } else {
+ // If our result set is < 10% of the index, we collect sparsely (use hash map). This
+ // heuristic is borrowed from IntTaxonomyFacetCounts:
+ if (totalHits < totalDocs / 10) {
+ sparseCounts = new IntIntHashMap();
+ denseCounts = null;
+ initialized = true;
+ } else {
+ sparseCounts = null;
+ denseCounts = new int[cardinality];
+ initialized = true;
+ }
+ count(facetsCollector);
}
}
-
- count(facetsCollector);
} else {
// Since we're counting all ordinals, count densely:
sparseCounts = null;
denseCounts = new int[cardinality];
+ initialized = true;
countAll();
}
@@ -294,6 +306,9 @@
if (matchingDocs.size() == 1) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(0);
+ if (hits.totalHits == 0) {
+ return;
+ }
// Validate state before doing anything else:
validateState(hits.context);
@@ -314,6 +329,10 @@
assert ordinalMap != null;
assert docValues instanceof MultiDocValues.MultiSortedSetDocValues;
+ if (hits.totalHits == 0) {
+ continue;
+ }
+
MultiDocValues.MultiSortedSetDocValues multiValues =
(MultiDocValues.MultiSortedSetDocValues) docValues;
@@ -368,6 +387,13 @@
FacetsCollector.MatchingDocs hits,
Bits liveDocs)
throws IOException {
+ if (initialized == false) {
+ assert denseCounts == null && sparseCounts == null;
+ // If the counters weren't initialized, we can assume the cardinality is low enough that
+ // dense counting will be preferrable:
+ denseCounts = new int[cardinality];
+ initialized = true;
+ }
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java
index ae5ac42..701d247 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java
@@ -157,20 +157,25 @@
private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
- LongRange[] longRanges = getLongRanges();
-
- LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
-
+ LongRangeCounter counter = null;
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
- DoubleValues fv = valueSource.getValues(hits.context, null);
- totCount += hits.totalHits;
+ if (hits.totalHits == 0) {
+ continue;
+ }
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
+ if (counter == null) {
+ counter = setupCounter();
+ }
+
+ DoubleValues fv = valueSource.getValues(hits.context, null);
+ totCount += hits.totalHits;
+
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
@@ -183,27 +188,34 @@
}
}
- missingCount += counter.finish();
- totCount -= missingCount;
+ if (counter != null) {
+ missingCount += counter.finish();
+ totCount -= missingCount;
+ }
}
/** Counts from the provided valueSource. */
private void count(MultiDoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
- LongRange[] longRanges = getLongRanges();
-
- LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
-
+ LongRangeCounter counter = null; // LongRangeCounter.create(longRanges, counts);
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
- MultiDoubleValues multiValues = valueSource.getValues(hits.context);
+ if (hits.totalHits == 0) {
+ continue;
+ }
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
+ if (counter == null) {
+ counter = setupCounter();
+ }
+
+ MultiDoubleValues multiValues = valueSource.getValues(hits.context);
+
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (multiValues.advanceExact(doc)) {
@@ -232,8 +244,10 @@
}
}
- missingCount += counter.finish();
- totCount -= missingCount;
+ if (counter != null) {
+ missingCount += counter.finish();
+ totCount -= missingCount;
+ }
}
/** Create long ranges from the double ranges. */
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java
index 42a2e4c..34aa3fc 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java
@@ -128,21 +128,27 @@
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
- LongRange[] ranges = getLongRanges();
-
- LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
+ LongRangeCounter counter = null;
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
- LongValues fv = valueSource.getValues(hits.context, null);
- totCount += hits.totalHits;
+ if (hits.totalHits == 0) {
+ continue;
+ }
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
+ if (counter == null) {
+ counter = setupCounter();
+ }
+
+ LongValues fv = valueSource.getValues(hits.context, null);
+ totCount += hits.totalHits;
+
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
@@ -155,26 +161,34 @@
}
}
- missingCount += counter.finish();
- totCount -= missingCount;
+ if (counter != null) {
+ missingCount += counter.finish();
+ totCount -= missingCount;
+ }
}
/** Counts from the provided valueSource. */
private void count(MultiLongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
- LongRange[] ranges = getLongRanges();
-
- LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
+ LongRangeCounter counter = null;
for (MatchingDocs hits : matchingDocs) {
- MultiLongValues multiValues = valueSource.getValues(hits.context);
+ if (hits.totalHits == 0) {
+ continue;
+ }
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
+ if (counter == null) {
+ counter = setupCounter();
+ }
+
+ MultiLongValues multiValues = valueSource.getValues(hits.context);
+
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (multiValues.advanceExact(doc)) {
@@ -203,8 +217,10 @@
}
}
- int missingCount = counter.finish();
- totCount -= missingCount;
+ if (counter != null) {
+ int missingCount = counter.finish();
+ totCount -= missingCount;
+ }
}
@Override
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java
index df22794..2263ef4 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java
@@ -38,8 +38,8 @@
/** Ranges passed to constructor. */
protected final Range[] ranges;
- /** Counts, initialized in by subclass. */
- protected final int[] counts;
+ /** Counts. */
+ protected int[] counts;
/** Our field name. */
protected final String field;
@@ -52,7 +52,6 @@
super(fastMatchQuery);
this.field = field;
this.ranges = ranges;
- counts = new int[ranges.length];
}
protected abstract LongRange[] getLongRanges();
@@ -61,6 +60,12 @@
return l;
}
+ protected LongRangeCounter setupCounter() {
+ assert counts == null;
+ counts = new int[ranges.length];
+ return LongRangeCounter.create(getLongRanges(), counts);
+ }
+
/** Counts from the provided field. */
protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
throws IOException {
@@ -68,15 +73,20 @@
// load doc values for all segments up front and keep track of whether-or-not we found any that
// were actually multi-valued. this allows us to optimize the case where all segments contain
// single-values.
- SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
+ SortedNumericDocValues[] multiValuedDocVals = null;
NumericDocValues[] singleValuedDocVals = null;
boolean foundMultiValued = false;
for (int i = 0; i < matchingDocs.size(); i++) {
-
FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
+ if (hits.totalHits == 0) {
+ continue;
+ }
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
+ if (multiValuedDocVals == null) {
+ multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
+ }
multiValuedDocVals[i] = multiValues;
// only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
@@ -93,6 +103,11 @@
}
}
+ if (multiValuedDocVals == null) {
+ // no hits or no doc values in all segments. nothing to count:
+ return;
+ }
+
// we only need to keep around one or the other at this point
if (foundMultiValued) {
singleValuedDocVals = null;
@@ -100,7 +115,7 @@
multiValuedDocVals = null;
}
- LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts);
+ LongRangeCounter counter = setupCounter();
int missingCount = 0;
@@ -182,9 +197,15 @@
@Override
public FacetResult getAllChildren(String dim, String... path) throws IOException {
validateDimAndPathForGetChildren(dim, path);
- LabelAndValue[] labelValues = new LabelAndValue[counts.length];
- for (int i = 0; i < counts.length; i++) {
- labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
+ LabelAndValue[] labelValues = new LabelAndValue[ranges.length];
+ if (counts == null) {
+ for (int i = 0; i < ranges.length; i++) {
+ labelValues[i] = new LabelAndValue(ranges[i].label, 0);
+ }
+ } else {
+ for (int i = 0; i < ranges.length; i++) {
+ labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
+ }
}
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
}
@@ -227,7 +248,7 @@
b.append(" ");
b.append(ranges[i].label);
b.append(" -> count=");
- b.append(counts[i]);
+ b.append(counts != null ? counts[i] : 0);
b.append('\n');
}
return b.toString();
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
index 4358450..ac42cf5 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@@ -69,6 +70,9 @@
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
validateTopN(topN);
+ if (hasCounts() == false) {
+ return null;
+ }
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path);
return createFacetResult(topChildrenForPath, dim, path);
}
@@ -81,6 +85,10 @@
return null;
}
+ if (hasCounts() == false) {
+ return null;
+ }
+
// Compute the actual results:
int pathCount = 0;
List<LabelAndValue> labelValues = new ArrayList<>();
@@ -112,12 +120,17 @@
return -1;
}
- return getCount(ord);
+ return hasCounts() == false ? 0 : getCount(ord);
}
@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
validateTopN(topN);
+
+ if (hasCounts() == false) {
+ return Collections.emptyList();
+ }
+
List<FacetResult> results = new ArrayList<>();
for (String dim : state.getDims()) {
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim);
@@ -137,6 +150,10 @@
validateTopN(topNDims);
validateTopN(topNChildren);
+ if (hasCounts() == false) {
+ return Collections.emptyList();
+ }
+
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
// string values.
PriorityQueue<DimValue> pq =
@@ -231,6 +248,9 @@
return Arrays.asList(results);
}
+ /** Were any counts actually computed? (They may not be if there are no hits, etc.) */
+ abstract boolean hasCounts();
+
/** Retrieve the count for a specified ordinal. */
abstract int getCount(int ord);
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
index 221836c..0b03da9 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
@@ -78,6 +78,13 @@
}
@Override
+ boolean hasCounts() {
+ // TODO: safe to always assume there are counts, but maybe it would be more optimal to
+ // actually track if we see a count?
+ return true;
+ }
+
+ @Override
int getCount(int ord) {
return counts.get(ord);
}
@@ -99,6 +106,11 @@
@Override
public Void call() throws IOException {
+ // If we're counting collected hits but there were none, short-circuit:
+ if (hits != null && hits.totalHits == 0) {
+ return null;
+ }
+
SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field);
if (multiValues == null) {
// nothing to count here
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
index 7ec8fe4..0d0cf46 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
@@ -56,7 +56,8 @@
* @lucene.experimental
*/
public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts {
- final int[] counts;
+ private final SortedSetDocValuesReaderState state;
+ int[] counts;
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
@@ -67,7 +68,7 @@
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits)
throws IOException {
super(state);
- this.counts = new int[state.getSize()];
+ this.state = state;
if (hits == null) {
// browse only
countAll();
@@ -76,6 +77,17 @@
}
}
+ private void initializeCounts() {
+ if (counts == null) {
+ counts = new int[state.getSize()];
+ }
+ }
+
+ @Override
+ boolean hasCounts() {
+ return counts != null;
+ }
+
@Override
int getCount(int ord) {
return counts[ord];
@@ -90,6 +102,9 @@
return;
}
+ // Initialize counts:
+ initializeCounts();
+
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
@@ -159,12 +174,19 @@
private void countOneSegment(
OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs)
throws IOException {
+ if (hits != null && hits.totalHits == 0) {
+ return;
+ }
+
SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field);
if (multiValues == null) {
// nothing to count
return;
}
+ // Initialize counts:
+ initializeCounts();
+
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
index b1daa75..b6098f7 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@@ -72,11 +72,15 @@
private void count(List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
SortedNumericDocValues multiValued =
FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
if (multiValued == null) {
continue;
}
+ initializeValueCounters();
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
@@ -115,13 +119,14 @@
}
private void countAll(IndexReader reader) throws IOException {
- assert values != null;
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues multiValued =
FacetUtils.loadOrdinalValues(context.reader(), indexFieldName);
if (multiValued == null) {
continue;
}
+ initializeValueCounters();
+ assert values != null;
Bits liveDocs = context.reader().getLiveDocs();
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
index 7e510da..c456e77 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
@@ -20,10 +20,12 @@
import com.carrotsearch.hppc.IntArrayList;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig;
import org.apache.lucene.facet.LabelAndValue;
@@ -48,7 +50,7 @@
protected final AssociationAggregationFunction aggregationFunction;
/** Per-ordinal value. */
- protected final float[] values;
+ protected float[] values;
/**
* Constructor that defaults the aggregation function to {@link
@@ -66,15 +68,30 @@
String indexFieldName,
TaxonomyReader taxoReader,
AssociationAggregationFunction aggregationFunction,
- FacetsConfig config)
+ FacetsConfig config,
+ FacetsCollector fc)
throws IOException {
- super(indexFieldName, taxoReader, config);
+ super(indexFieldName, taxoReader, config, fc);
this.aggregationFunction = aggregationFunction;
- values = new float[taxoReader.getSize()];
+ }
+
+ @Override
+ boolean hasValues() {
+ return values != null;
+ }
+
+ void initializeValueCounters() {
+ if (values == null) {
+ values = new float[taxoReader.getSize()];
+ }
}
/** Rolls up any single-valued hierarchical dimensions. */
protected void rollup() throws IOException {
+ if (values == null) {
+ return;
+ }
+
// Rollup any necessary dims:
ParallelTaxonomyArrays.IntArray children = getChildren();
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
@@ -120,7 +137,7 @@
if (ord < 0) {
return -1;
}
- return values[ord];
+ return values == null ? 0 : values[ord];
}
@Override
@@ -190,6 +207,10 @@
return null;
}
+ if (values == null) {
+ return null;
+ }
+
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
return createFacetResult(topChildrenForPath, dim, path);
}
@@ -288,6 +309,10 @@
validateTopN(topNDims);
validateTopN(topNChildren);
+ if (values == null) {
+ return Collections.emptyList();
+ }
+
// get existing children and siblings ordinal array from TaxonomyFacets
ParallelTaxonomyArrays.IntArray children = getChildren();
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
index 25dbf34..d46ad78 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
@@ -21,6 +21,7 @@
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -55,14 +56,17 @@
* checked which is being used before a loop instead of calling {@link #increment} for each
* iteration.
*/
- protected final int[] values;
+ protected int[] values;
/**
* Sparse ordinal values.
*
* @see #values for why protected.
*/
- protected final IntIntHashMap sparseValues;
+ protected IntIntHashMap sparseValues;
+
+ /** Have value counters been initialized. */
+ boolean initialized;
/**
* Constructor that defaults the aggregation function to {@link
@@ -91,14 +95,24 @@
AssociationAggregationFunction aggregationFunction,
FacetsCollector fc)
throws IOException {
- super(indexFieldName, taxoReader, config);
+ super(indexFieldName, taxoReader, config, fc);
this.aggregationFunction = aggregationFunction;
+ }
+ @Override
+ boolean hasValues() {
+ return initialized;
+ }
+
+ void initializeValueCounters() {
+ if (initialized) {
+ return;
+ }
+ initialized = true;
+ assert sparseValues == null && values == null;
if (useHashTable(fc, taxoReader)) {
sparseValues = new IntIntHashMap();
- values = null;
} else {
- sparseValues = null;
values = new int[taxoReader.getSize()];
}
}
@@ -160,6 +174,10 @@
/** Rolls up any single-valued hierarchical dimensions. */
protected void rollup() throws IOException {
+ if (initialized == false) {
+ return;
+ }
+
// Rollup any necessary dims:
ParallelTaxonomyArrays.IntArray children = null;
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
@@ -214,7 +232,7 @@
if (ord < 0) {
return -1;
}
- return getValue(ord);
+ return initialized ? getValue(ord) : 0;
}
@Override
@@ -226,6 +244,10 @@
return null;
}
+ if (initialized == false) {
+ return null;
+ }
+
int aggregatedValue = 0;
IntArrayList ordinals = new IntArrayList();
@@ -292,6 +314,10 @@
return null;
}
+ if (initialized == false) {
+ return null;
+ }
+
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
return createFacetResult(topChildrenForPath, dim, path);
}
@@ -377,6 +403,10 @@
throw new IllegalArgumentException("topN must be > 0");
}
+ if (initialized == false) {
+ return Collections.emptyList();
+ }
+
// get children and siblings ordinal array from TaxonomyFacets
ParallelTaxonomyArrays.IntArray children = getChildren();
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
index 8dd55e9..60fcaa6 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
@@ -60,6 +60,11 @@
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
IntsRef scratch = new IntsRef();
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeValueCounters();
+
OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
DocIdSetIterator docs = hits.bits.iterator();
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java
index b85ffc9..2e3db92 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java
@@ -93,7 +93,7 @@
FacetsCollector fc,
AssociationAggregationFunction aggregationFunction)
throws IOException {
- super(indexFieldName, taxoReader, aggregationFunction, config);
+ super(indexFieldName, taxoReader, aggregationFunction, config, fc);
ordinalsReader = null;
aggregateValues(aggregationFunction, fc.getMatchingDocs());
}
@@ -110,7 +110,7 @@
AssociationAggregationFunction aggregationFunction,
DoubleValuesSource valuesSource)
throws IOException {
- super(indexFieldName, taxoReader, aggregationFunction, config);
+ super(indexFieldName, taxoReader, aggregationFunction, config, fc);
ordinalsReader = null;
aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource);
}
@@ -131,7 +131,7 @@
AssociationAggregationFunction aggregationFunction,
DoubleValuesSource valuesSource)
throws IOException {
- super(ordinalsReader.getIndexFieldName(), taxoReader, aggregationFunction, config);
+ super(ordinalsReader.getIndexFieldName(), taxoReader, aggregationFunction, config, fc);
this.ordinalsReader = ordinalsReader;
aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource);
}
@@ -166,6 +166,11 @@
// If the user provided a custom ordinals reader, use it to retrieve the document ordinals:
IntsRef scratch = new IntsRef();
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeValueCounters();
+
OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
DoubleValues scores = keepScores ? scores(hits) : null;
DoubleValues functionValues = valueSource.getValues(hits.context, scores);
@@ -186,6 +191,11 @@
}
} else {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeValueCounters();
+
SortedNumericDocValues ordinalValues =
FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
if (ordinalValues == null) {
@@ -221,6 +231,11 @@
throws IOException {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeValueCounters();
+
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
DocIdSetIterator it =
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java
index 09638bc..f437efa 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java
@@ -63,6 +63,11 @@
AssociationAggregationFunction aggregationFunction, List<MatchingDocs> matchingDocs)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
+ if (hits.totalHits == 0) {
+ continue;
+ }
+ initializeValueCounters();
+
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
DocIdSetIterator it = ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), dv));
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
index cb23e8f..5299264 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java
@@ -19,11 +19,13 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
+import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig;
@@ -62,6 +64,9 @@
/** {@code FacetsConfig} provided to the constructor. */
protected final FacetsConfig config;
+ /** {@code FacetsCollector} provided to the constructor. */
+ final FacetsCollector fc;
+
/** Maps parent ordinal to its child, or -1 if the parent is childless. */
private ParallelTaxonomyArrays.IntArray children;
@@ -71,12 +76,29 @@
/** Maps an ordinal to its parent, or -1 if there is no parent (root node). */
final ParallelTaxonomyArrays.IntArray parents;
- /** Sole constructor. */
+ /**
+ * Constructor without a {@link FacetsCollector} - we don't have access to the hits, so we have to
+ * assume there are hits when initializing internal data structures.
+ *
+ * @deprecated To be removed in Lucene 10.
+ */
+ @Deprecated
protected TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
throws IOException {
+ this(indexFieldName, taxoReader, config, null);
+ }
+
+ /**
+ * Constructor with a {@link FacetsCollector}, allowing lazy initialization of internal data
+ * structures.
+ */
+ TaxonomyFacets(
+ String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
+ throws IOException {
this.indexFieldName = indexFieldName;
this.taxoReader = taxoReader;
this.config = config;
+ this.fc = fc;
parents = taxoReader.getParallelTaxonomyArrays().parents();
}
@@ -147,6 +169,11 @@
@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
validateTopN(topN);
+
+ if (hasValues() == false) {
+ return Collections.emptyList();
+ }
+
ParallelTaxonomyArrays.IntArray children = getChildren();
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
@@ -167,4 +194,7 @@
results.sort(BY_VALUE_THEN_DIM);
return results;
}
+
+ /** Were any values actually aggregated during counting? */
+ abstract boolean hasValues();
}