LUCENE-9945: Extend DrillSidewaysResult to expose drillDowns and drillSideways (#159)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e07b498..fef6b98 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -245,6 +245,8 @@
* LUCENE-10019: Align file starts in CFS files to have proper alignment (8 bytes)
(Uwe Schinder)
+* LUCENE-9945: Extend DrillSideways to support exposing FacetCollectors directly. (Greg Miller, Sejal Pawar)
+
Bug fixes
* LUCENE-9686: Fix read past EOF handling in DirectIODirectory. (Zach Chen,
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
index 7fb3c7c..758a31e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
@@ -210,7 +210,8 @@
} else {
searcher.search(query, hitCollector);
}
- return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null);
+ return new DrillSidewaysResult(
+ buildFacetsResult(drillDownCollector, null, null), null, drillDownCollector, null, null);
}
Query baseQuery = query.getBaseQuery();
@@ -272,12 +273,14 @@
drillSidewaysFacetsCollectorManagers[dim].reduce(facetsCollectorsForDim);
}
+ String[] drillSidewaysDims = drillDownDims.keySet().toArray(new String[0]);
+
return new DrillSidewaysResult(
- buildFacetsResult(
- drillDownCollector,
- drillSidewaysCollectors,
- drillDownDims.keySet().toArray(new String[0])),
- null);
+ buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillSidewaysDims),
+ null,
+ drillDownCollector,
+ drillSidewaysCollectors,
+ drillSidewaysDims);
}
/** Search, sorting by {@link Sort}, and computing drill down and sideways counts. */
@@ -318,7 +321,12 @@
if (doDocScores) {
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, query);
}
- return new DrillSidewaysResult(r.facets, topDocs);
+ return new DrillSidewaysResult(
+ r.facets,
+ topDocs,
+ r.drillDownFacetsCollector,
+ r.drillSidewaysFacetsCollector,
+ r.drillSidewaysDims);
} else {
@@ -329,7 +337,12 @@
if (doDocScores) {
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, query);
}
- return new DrillSidewaysResult(r.facets, topDocs);
+ return new DrillSidewaysResult(
+ r.facets,
+ topDocs,
+ r.drillDownFacetsCollector,
+ r.drillSidewaysFacetsCollector,
+ r.drillSidewaysDims);
}
} else {
return search(after, query, topN);
@@ -370,14 +383,24 @@
}
};
ConcurrentDrillSidewaysResult<TopDocs> r = searchConcurrently(query, collectorManager);
- return new DrillSidewaysResult(r.facets, r.collectorResult);
+ return new DrillSidewaysResult(
+ r.facets,
+ r.collectorResult,
+ r.drillDownFacetsCollector,
+ r.drillSidewaysFacetsCollector,
+ r.drillSidewaysDims);
} else {
TopScoreDocCollector hitCollector =
TopScoreDocCollector.create(topN, after, Integer.MAX_VALUE);
DrillSidewaysResult r = search(query, hitCollector);
- return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
+ return new DrillSidewaysResult(
+ r.facets,
+ hitCollector.topDocs(),
+ r.drillDownFacetsCollector,
+ r.drillSidewaysFacetsCollector,
+ r.drillSidewaysDims);
}
}
@@ -390,7 +413,11 @@
return false;
}
- /** Result of a drill sideways search, including the {@link Facets} and {@link TopDocs}. */
+ /**
+ * Result of a drill sideways search, including the {@link Facets} and {@link TopDocs}. The {@link
+ * FacetsCollector}s for the drill down and drill sideways dimensions are also exposed for
+ * advanced use-cases that need access to them as an alternative to accessing the {@code Facets}.
+ */
public static class DrillSidewaysResult {
/** Combined drill down and sideways results. */
public final Facets facets;
@@ -398,10 +425,41 @@
/** Hits. */
public final TopDocs hits;
+ /**
+ * FacetsCollector populated based on hits that match the full DrillDownQuery, treating all
+ * drill down dimensions as required clauses. Useful for advanced use-cases that want to compute
+ * Facets results separate from the provided Facets in this result.
+ */
+ public final FacetsCollector drillDownFacetsCollector;
+
+ /**
+ * FacetsCollectors populated for each drill sideways dimension. Each collector exposes the hits
+ * that match on all DrillDownQuery dimensions, but treating their corresponding sideways
+ * dimension as optional. This array provides a FacetsCollector for each drill down dimension
+ * present in the original DrillDownQuery, and the associated dimension for each FacetsCollector
+ * can be determined using the parallel {@link DrillSidewaysResult#drillSidewaysDims} array.
+ * Useful for advanced use-cases that want to compute Facets results separate from the provided
+ * Facets in this result.
+ */
+ public final FacetsCollector[] drillSidewaysFacetsCollector;
+
+ /**
+ * Dimensions that correspond to to the {@link DrillSidewaysResult#drillSidewaysFacetsCollector}
+ */
+ public final String[] drillSidewaysDims;
+
/** Sole constructor. */
- public DrillSidewaysResult(Facets facets, TopDocs hits) {
+ public DrillSidewaysResult(
+ Facets facets,
+ TopDocs hits,
+ FacetsCollector drillDownFacetsCollector,
+ FacetsCollector[] drillSidewaysFacetsCollector,
+ String[] drillSidewaysDims) {
this.facets = facets;
this.hits = hits;
+ this.drillDownFacetsCollector = drillDownFacetsCollector;
+ this.drillSidewaysFacetsCollector = drillSidewaysFacetsCollector;
+ this.drillSidewaysDims = drillSidewaysDims;
}
}
@@ -487,7 +545,12 @@
}
return new ConcurrentDrillSidewaysResult<>(
- buildFacetsResult(mainFacetsCollector, null, null), null, collectorResult);
+ buildFacetsResult(mainFacetsCollector, null, null),
+ null,
+ collectorResult,
+ mainFacetsCollector,
+ null,
+ null);
}
Query baseQuery = query.getBaseQuery();
@@ -539,13 +602,15 @@
drillSidewaysFacetsCollectorManagers[dim].reduce(facetsCollectorsForDim);
}
+ String[] drillSidewaysDims = drillDownDims.keySet().toArray(new String[0]);
+
return new ConcurrentDrillSidewaysResult<>(
- buildFacetsResult(
- drillDownCollector,
- drillSidewaysCollectors,
- drillDownDims.keySet().toArray(new String[0])),
+ buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillSidewaysDims),
null,
- collectorResult);
+ collectorResult,
+ drillDownCollector,
+ drillSidewaysCollectors,
+ drillSidewaysDims);
}
@SuppressWarnings("unchecked")
@@ -607,12 +672,16 @@
throw new RuntimeException(e);
}
+ String[] drillSidewaysDims = drillDownDims.keySet().toArray(new String[0]);
+
// build the facets and return the result
return new ConcurrentDrillSidewaysResult<>(
- buildFacetsResult(
- mainFacetsCollector, facetsCollectors, drillDownDims.keySet().toArray(new String[0])),
+ buildFacetsResult(mainFacetsCollector, facetsCollectors, drillSidewaysDims),
null,
- collectorResult);
+ collectorResult,
+ mainFacetsCollector,
+ facetsCollectors,
+ drillSidewaysDims);
}
/**
@@ -624,8 +693,15 @@
public final R collectorResult;
/** Sole constructor. */
- ConcurrentDrillSidewaysResult(Facets facets, TopDocs hits, R collectorResult) {
- super(facets, hits);
+ ConcurrentDrillSidewaysResult(
+ Facets facets,
+ TopDocs hits,
+ R collectorResult,
+ FacetsCollector drillDownFacetsCollector,
+ FacetsCollector[] drillSidewaysFacetsCollector,
+ String[] drillSidewaysDims) {
+ super(
+ facets, hits, drillDownFacetsCollector, drillSidewaysFacetsCollector, drillSidewaysDims);
this.collectorResult = collectorResult;
}
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
index b9e65f1..61f0ac9 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
@@ -1798,4 +1798,80 @@
writer.close();
IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
}
+
+ public void testExtendedDrillSidewaysResult() throws Exception {
+ // LUCENE-9945: Extend DrillSideways to support exposing FacetCollectors directly
+ Directory dir = newDirectory();
+ Directory taxoDir = newDirectory();
+
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+ DirectoryTaxonomyWriter taxoWriter =
+ new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
+
+ FacetsConfig config = new FacetsConfig();
+ config.setHierarchical("dim", true);
+
+ Document doc = new Document();
+ doc.add(new FacetField("dim", "a"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ Document doc2 = new Document();
+ doc.add(new FacetField("dim", "x"));
+ writer.addDocument(config.build(taxoWriter, doc2));
+
+ // open NRT
+ IndexSearcher searcher = getNewSearcher(writer.getReader());
+ TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.add("dim", "x");
+
+ DrillSideways ds = getNewDrillSidewaysBuildFacetsResult(searcher, config, taxoReader);
+
+ SimpleCollectorManager manager =
+ new SimpleCollectorManager(
+ 10, (a, b) -> Float.compare(b.docAndScore.score, a.docAndScore.score));
+ SimpleCollector collector = manager.newCollector();
+
+ // Sometimes pass in a Collector and sometimes CollectorManager
+ // so that we can test both DrillSidewaysResult and ConcurrentDrillSidewaysResult
+ DrillSidewaysResult r;
+ if (random().nextBoolean()) {
+ r = ds.search(ddq, collector);
+ } else {
+ r = ds.search(ddq, manager);
+ }
+
+ // compute Facets using exposed FacetCollectors from DrillSidewaysResult
+ Map<String, Facets> drillSidewaysFacets = new HashMap<>();
+ Facets drillDownFacets = getTaxonomyFacetCounts(taxoReader, config, r.drillDownFacetsCollector);
+ if (r.drillSidewaysFacetsCollector != null) {
+ for (int i = 0; i < r.drillSidewaysFacetsCollector.length; i++) {
+ drillSidewaysFacets.put(
+ r.drillSidewaysDims[i],
+ getTaxonomyFacetCounts(taxoReader, config, r.drillSidewaysFacetsCollector[i]));
+ }
+ }
+
+ Facets facets;
+ if (drillSidewaysFacets.isEmpty()) {
+ facets = drillDownFacets;
+ } else {
+ facets = new MultiFacets(drillSidewaysFacets, drillDownFacets);
+ }
+
+ // Facets computed using FacetsCollecter exposed in DrillSidewaysResult
+ // should match the Facets computed by {@link DrillSideways#buildFacetsResult}
+ FacetResult facetResultActual = facets.getTopChildren(2, "dim");
+ FacetResult facetResultExpected = r.facets.getTopChildren(2, "dim");
+
+ assertEquals(facetResultExpected.dim, facetResultActual.dim);
+ assertEquals(facetResultExpected.path.length, facetResultActual.path.length);
+ assertEquals(facetResultExpected.value, facetResultActual.value);
+ assertEquals(facetResultExpected.childCount, facetResultActual.childCount);
+
+ writer.close();
+ IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
+ }
}