blob: 15df7e3d957b01aeb7c012713312f2268f574a7b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.MultiCollectorManager;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.ThreadInterruptedException;
/**
* Computes drill down and sideways counts for the provided
* {@link DrillDownQuery}. Drill sideways counts include
* alternative values/aggregates for the drill-down
* dimensions so that a dimension does not disappear after
* the user drills down into it.
* <p> Use one of the static search
* methods to do the search, and then get the hits and facet
* results from the returned {@link DrillSidewaysResult}.
* <p><b>NOTE</b>: this allocates one {@link
* FacetsCollector} for each drill-down, plus one. If your
* index has high number of facet labels then this will
* multiply your memory usage.
*
* @lucene.experimental
*/
public class DrillSideways {
/**
* {@link IndexSearcher} passed to constructor.
*/
protected final IndexSearcher searcher;
/**
* {@link TaxonomyReader} passed to constructor.
*/
protected final TaxonomyReader taxoReader;
/**
* {@link SortedSetDocValuesReaderState} passed to
* constructor; can be null.
*/
protected final SortedSetDocValuesReaderState state;
/**
* {@link FacetsConfig} passed to constructor.
*/
protected final FacetsConfig config;
// These are only used for multi-threaded search
private final ExecutorService executor;
/**
* Create a new {@code DrillSideways} instance.
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) {
this(searcher, config, taxoReader, null);
}
/**
* Create a new {@code DrillSideways} instance, assuming the categories were
* indexed with {@link SortedSetDocValuesFacetField}.
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) {
this(searcher, config, null, state);
}
/**
* Create a new {@code DrillSideways} instance, where some
* dimensions were indexed with {@link
* SortedSetDocValuesFacetField} and others were indexed
* with {@link FacetField}.
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader,
SortedSetDocValuesReaderState state) {
this(searcher, config, taxoReader, state, null);
}
/**
* Create a new {@code DrillSideways} instance, where some
* dimensions were indexed with {@link
* SortedSetDocValuesFacetField} and others were indexed
* with {@link FacetField}.
* <p>
* Use this constructor to use the concurrent implementation and/or the CollectorManager
*/
public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader,
SortedSetDocValuesReaderState state, ExecutorService executor) {
this.searcher = searcher;
this.config = config;
this.taxoReader = taxoReader;
this.state = state;
this.executor = executor;
}
/**
* Subclass can override to customize per-dim Facets
* impl.
*/
protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways,
String[] drillSidewaysDims) throws IOException {
Facets drillDownFacets;
Map<String, Facets> drillSidewaysFacets = new HashMap<>();
if (taxoReader != null) {
drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns);
if (drillSideways != null) {
for (int i = 0; i < drillSideways.length; i++) {
drillSidewaysFacets.put(drillSidewaysDims[i],
new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]));
}
}
} else {
drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns);
if (drillSideways != null) {
for (int i = 0; i < drillSideways.length; i++) {
drillSidewaysFacets.put(drillSidewaysDims[i], new SortedSetDocValuesFacetCounts(state, drillSideways[i]));
}
}
}
if (drillSidewaysFacets.isEmpty()) {
return drillDownFacets;
} else {
return new MultiFacets(drillSidewaysFacets, drillDownFacets);
}
}
/**
* Search, collecting hits with a {@link Collector}, and
* computing drill down and sideways counts.
*/
public DrillSidewaysResult search(DrillDownQuery query, Collector hitCollector) throws IOException {
Map<String, Integer> drillDownDims = query.getDims();
FacetsCollector drillDownCollector = new FacetsCollector();
if (drillDownDims.isEmpty()) {
// There are no drill-down dims, so there is no
// drill-sideways to compute:
searcher.search(query, MultiCollector.wrap(hitCollector, drillDownCollector));
return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null);
}
Query baseQuery = query.getBaseQuery();
if (baseQuery == null) {
// TODO: we could optimize this pure-browse case by
// making a custom scorer instead:
baseQuery = new MatchAllDocsQuery();
}
Query[] drillDownQueries = query.getDrillDownQueries();
FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
for (int i = 0; i < drillSidewaysCollectors.length; i++) {
drillSidewaysCollectors[i] = new FacetsCollector();
}
DrillSidewaysQuery dsq =
new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries,
scoreSubDocsAtOnce());
if (hitCollector.scoreMode().needsScores() == false) {
// this is a horrible hack in order to make sure IndexSearcher will not
// attempt to cache the DrillSidewaysQuery
hitCollector = new FilterCollector(hitCollector) {
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE;
}
};
}
searcher.search(dsq, hitCollector);
return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors,
drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
}
/**
* Search, sorting by {@link Sort}, and computing
* drill down and sideways counts.
*/
public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort,
boolean doDocScores) throws IOException {
if (filter != null) {
query = new DrillDownQuery(config, filter, query);
}
if (sort != null) {
int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
limit = 1; // the collector does not alow numHits = 0
}
final int fTopN = Math.min(topN, limit);
if (executor != null) { // We have an executor, let use the multi-threaded version
final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager =
new CollectorManager<TopFieldCollector, TopFieldDocs>() {
@Override
public TopFieldCollector newCollector() throws IOException {
return TopFieldCollector.create(sort, fTopN, after, Integer.MAX_VALUE);
}
@Override
public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()];
int pos = 0;
for (TopFieldCollector collector : collectors)
topFieldDocs[pos++] = collector.topDocs();
return TopDocs.merge(sort, topN, topFieldDocs);
}
};
ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager);
TopFieldDocs topDocs = r.collectorResult;
if (doDocScores) {
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, query);
}
return new DrillSidewaysResult(r.facets, topDocs);
} else {
final TopFieldCollector hitCollector =
TopFieldCollector.create(sort, fTopN, after, Integer.MAX_VALUE);
DrillSidewaysResult r = search(query, hitCollector);
TopFieldDocs topDocs = hitCollector.topDocs();
if (doDocScores) {
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, query);
}
return new DrillSidewaysResult(r.facets, topDocs);
}
} else {
return search(after, query, topN);
}
}
/**
* Search, sorting by score, and computing
* drill down and sideways counts.
*/
public DrillSidewaysResult search(DrillDownQuery query, int topN) throws IOException {
return search(null, query, topN);
}
/**
* Search, sorting by score, and computing
* drill down and sideways counts.
*/
public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException {
int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
limit = 1; // the collector does not alow numHits = 0
}
final int fTopN = Math.min(topN, limit);
if (executor != null) { // We have an executor, let use the multi-threaded version
final CollectorManager<TopScoreDocCollector, TopDocs> collectorManager =
new CollectorManager<TopScoreDocCollector, TopDocs>() {
@Override
public TopScoreDocCollector newCollector() throws IOException {
return TopScoreDocCollector.create(fTopN, after, Integer.MAX_VALUE);
}
@Override
public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
final TopDocs[] topDocs = new TopDocs[collectors.size()];
int pos = 0;
for (TopScoreDocCollector collector : collectors)
topDocs[pos++] = collector.topDocs();
return TopDocs.merge(topN, topDocs);
}
};
ConcurrentDrillSidewaysResult<TopDocs> r = search(query, collectorManager);
return new DrillSidewaysResult(r.facets, r.collectorResult);
} else {
TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after, Integer.MAX_VALUE);
DrillSidewaysResult r = search(query, hitCollector);
return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
}
}
/**
* Override this and return true if your collector
* (e.g., {@code ToParentBlockJoinCollector}) expects all
* sub-scorers to be positioned on the document being
* collected. This will cause some performance loss;
* default is false.
*/
protected boolean scoreSubDocsAtOnce() {
return false;
}
/**
* Result of a drill sideways search, including the
* {@link Facets} and {@link TopDocs}.
*/
public static class DrillSidewaysResult {
/**
* Combined drill down and sideways results.
*/
public final Facets facets;
/**
* Hits.
*/
public final TopDocs hits;
/**
* Sole constructor.
*/
public DrillSidewaysResult(Facets facets, TopDocs hits) {
this.facets = facets;
this.hits = hits;
}
}
private static class CallableCollector implements Callable<CallableResult> {
private final int pos;
private final IndexSearcher searcher;
private final Query query;
private final CollectorManager<?, ?> collectorManager;
private CallableCollector(int pos, IndexSearcher searcher, Query query, CollectorManager<?, ?> collectorManager) {
this.pos = pos;
this.searcher = searcher;
this.query = query;
this.collectorManager = collectorManager;
}
@Override
public CallableResult call() throws Exception {
return new CallableResult(pos, searcher.search(query, collectorManager));
}
}
private static class CallableResult {
private final int pos;
private final Object result;
private CallableResult(int pos, Object result) {
this.pos = pos;
this.result = result;
}
}
private DrillDownQuery getDrillDownQuery(final DrillDownQuery query, Query[] queries,
final String excludedDimension) {
final DrillDownQuery ddl = new DrillDownQuery(config, query.getBaseQuery());
query.getDims().forEach((dim, pos) -> {
if (!dim.equals(excludedDimension))
ddl.add(dim, queries[pos]);
});
return ddl.getDims().size() == queries.length ? null : ddl;
}
/** Runs a search, using a {@link CollectorManager} to gather and merge search results */
public <R> ConcurrentDrillSidewaysResult<R> search(final DrillDownQuery query,
final CollectorManager<?, R> hitCollectorManager) throws IOException {
final Map<String, Integer> drillDownDims = query.getDims();
final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1);
// Add the main DrillDownQuery
callableCollectors.add(new CallableCollector(-1, searcher, query,
new MultiCollectorManager(new FacetsCollectorManager(), hitCollectorManager)));
int i = 0;
final Query[] filters = query.getDrillDownQueries();
for (String dim : drillDownDims.keySet())
callableCollectors.add(new CallableCollector(i++, searcher, getDrillDownQuery(query, filters, dim),
new FacetsCollectorManager()));
final FacetsCollector mainFacetsCollector;
final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()];
final R collectorResult;
try {
// Run the query pool
final List<Future<CallableResult>> futures = executor.invokeAll(callableCollectors);
// Extract the results
final Object[] mainResults = (Object[]) futures.get(0).get().result;
mainFacetsCollector = (FacetsCollector) mainResults[0];
collectorResult = (R) mainResults[1];
for (i = 1; i < futures.size(); i++) {
final CallableResult result = futures.get(i).get();
facetsCollectors[result.pos] = (FacetsCollector) result.result;
}
// Fill the null results with the mainFacetsCollector
for (i = 0; i < facetsCollectors.length; i++)
if (facetsCollectors[i] == null)
facetsCollectors[i] = mainFacetsCollector;
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
// build the facets and return the result
return new ConcurrentDrillSidewaysResult<>(buildFacetsResult(mainFacetsCollector, facetsCollectors,
drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null, collectorResult);
}
/**
* Result of a concurrent drill sideways search, including the
* {@link Facets} and {@link TopDocs}.
*/
public static class ConcurrentDrillSidewaysResult<R> extends DrillSidewaysResult {
/** The merged search results */
public final R collectorResult;
/**
* Sole constructor.
*/
ConcurrentDrillSidewaysResult(Facets facets, TopDocs hits, R collectorResult) {
super(facets, hits);
this.collectorResult = collectorResult;
}
}
}