| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search; |
| |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.List; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.concurrent.CompletableFuture; |
| import java.util.concurrent.ExecutionException; |
| import java.util.concurrent.Executor; |
| import java.util.concurrent.Future; |
| import java.util.concurrent.FutureTask; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexReaderContext; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.ReaderUtil; |
| import org.apache.lucene.index.StoredFieldVisitor; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermStates; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.search.similarities.BM25Similarity; |
| import org.apache.lucene.search.similarities.Similarity; |
| import org.apache.lucene.store.NIOFSDirectory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.ThreadInterruptedException; |
| |
| /** Implements search over a single IndexReader. |
| * |
| * <p>Applications usually need only call the inherited |
| * {@link #search(Query,int)} method. For |
| * performance reasons, if your index is unchanging, you |
| * should share a single IndexSearcher instance across |
| * multiple searches instead of creating a new one |
| * per-search. If your index has changed and you wish to |
| * see the changes reflected in searching, you should |
| * use {@link DirectoryReader#openIfChanged(DirectoryReader)} |
| * to obtain a new reader and |
| * then create a new IndexSearcher from that. Also, for |
| * low-latency turnaround it's best to use a near-real-time |
| * reader ({@link DirectoryReader#open(IndexWriter)}). |
| * Once you have a new {@link IndexReader}, it's relatively |
| * cheap to create a new IndexSearcher from it. |
| * |
| * <p><b>NOTE</b>: The {@link #search} and {@link #searchAfter} methods are |
| * configured to only count top hits accurately up to {@code 1,000} and may |
| * return a {@link TotalHits.Relation lower bound} of the hit count if the |
| * hit count is greater than or equal to {@code 1,000}. On queries that match |
| * lots of documents, counting the number of hits may take much longer than |
| * computing the top hits so this trade-off allows to get some minimal |
| * information about the hit count without slowing down search too much. The |
| * {@link TopDocs#scoreDocs} array is always accurate however. If this behavior |
| * doesn't suit your needs, you should create collectors manually with either |
| * {@link TopScoreDocCollector#create} or {@link TopFieldCollector#create} and |
| * call {@link #search(Query, Collector)}. |
| * |
| * <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link |
| * IndexSearcher}</code> instances are completely |
| * thread safe, meaning multiple threads can call any of its |
| * methods, concurrently. If your application requires |
| * external synchronization, you should <b>not</b> |
| * synchronize on the <code>IndexSearcher</code> instance; |
| * use your own (non-Lucene) objects instead.</p> |
| */ |
| public class IndexSearcher { |
| |
| private static QueryCache DEFAULT_QUERY_CACHE; |
| private static QueryCachingPolicy DEFAULT_CACHING_POLICY = new UsageTrackingQueryCachingPolicy(); |
| static { |
| final int maxCachedQueries = 1000; |
| // min of 32MB or 5% of the heap size |
| final long maxRamBytesUsed = Math.min(1L << 25, Runtime.getRuntime().maxMemory() / 20); |
| DEFAULT_QUERY_CACHE = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed); |
| } |
| /** |
| * By default we count hits accurately up to 1000. This makes sure that we |
| * don't spend most time on computing hit counts |
| */ |
| private static final int TOTAL_HITS_THRESHOLD = 1000; |
| |
| final IndexReader reader; // package private for testing! |
| |
| // NOTE: these members might change in incompatible ways |
| // in the next release |
| protected final IndexReaderContext readerContext; |
| protected final List<LeafReaderContext> leafContexts; |
| /** used with executor - each slice holds a set of leafs executed within one thread */ |
| private final LeafSlice[] leafSlices; |
| |
| // These are only used for multi-threaded search |
| private final Executor executor; |
| |
| // the default Similarity |
| private static final Similarity defaultSimilarity = new BM25Similarity(); |
| |
| private QueryCache queryCache = DEFAULT_QUERY_CACHE; |
| private QueryCachingPolicy queryCachingPolicy = DEFAULT_CACHING_POLICY; |
| |
| /** |
| * Expert: returns a default Similarity instance. |
| * In general, this method is only called to initialize searchers and writers. |
| * User code and query implementations should respect |
| * {@link IndexSearcher#getSimilarity()}. |
| * @lucene.internal |
| */ |
| public static Similarity getDefaultSimilarity() { |
| return defaultSimilarity; |
| } |
| |
| /** |
| * Expert: Get the default {@link QueryCache} or {@code null} if the cache is disabled. |
| * @lucene.internal |
| */ |
| public static QueryCache getDefaultQueryCache() { |
| return DEFAULT_QUERY_CACHE; |
| } |
| |
| /** |
| * Expert: set the default {@link QueryCache} instance. |
| * @lucene.internal |
| */ |
| public static void setDefaultQueryCache(QueryCache defaultQueryCache) { |
| DEFAULT_QUERY_CACHE = defaultQueryCache; |
| } |
| |
| /** |
| * Expert: Get the default {@link QueryCachingPolicy}. |
| * @lucene.internal |
| */ |
| public static QueryCachingPolicy getDefaultQueryCachingPolicy() { |
| return DEFAULT_CACHING_POLICY; |
| } |
| |
| /** |
| * Expert: set the default {@link QueryCachingPolicy} instance. |
| * @lucene.internal |
| */ |
| public static void setDefaultQueryCachingPolicy(QueryCachingPolicy defaultQueryCachingPolicy) { |
| DEFAULT_CACHING_POLICY = defaultQueryCachingPolicy; |
| } |
| |
| /** The Similarity implementation used by this searcher. */ |
| private Similarity similarity = defaultSimilarity; |
| |
| /** Creates a searcher searching the provided index. */ |
| public IndexSearcher(IndexReader r) { |
| this(r, null); |
| } |
| |
| /** |
| * Runs searches for each segment separately, using the provided Executor. NOTE: if you are using |
| * {@link NIOFSDirectory}, do not use the shutdownNow method of ExecutorService as this uses |
| * Thread.interrupt under-the-hood which can silently close file descriptors (see <a href= |
| * "https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>). |
| * |
| * @lucene.experimental |
| */ |
| public IndexSearcher(IndexReader r, Executor executor) { |
| this(r.getContext(), executor); |
| } |
| |
| /** |
| * Creates a searcher searching the provided top-level {@link IndexReaderContext}. |
| * |
| * <p>Given a non-<code>null</code> {@link Executor} this method runs searches for each segment |
| * separately, using the provided Executor. NOTE: if you are using {@link NIOFSDirectory}, do not |
| * use the shutdownNow method of ExecutorService as this uses Thread.interrupt under-the-hood |
| * which can silently close file descriptors (see <a href= |
| * "https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>). |
| * |
| * @see IndexReaderContext |
| * @see IndexReader#getContext() |
| * @lucene.experimental |
| */ |
| public IndexSearcher(IndexReaderContext context, Executor executor) { |
| assert context.isTopLevel: "IndexSearcher's ReaderContext must be topLevel for reader" + context.reader(); |
| reader = context.reader(); |
| this.executor = executor; |
| this.readerContext = context; |
| leafContexts = context.leaves(); |
| this.leafSlices = executor == null ? null : slices(leafContexts); |
| } |
| |
| /** |
| * Creates a searcher searching the provided top-level {@link IndexReaderContext}. |
| * |
| * @see IndexReaderContext |
| * @see IndexReader#getContext() |
| * @lucene.experimental |
| */ |
| public IndexSearcher(IndexReaderContext context) { |
| this(context, null); |
| } |
| |
| /** |
| * Set the {@link QueryCache} to use when scores are not needed. |
| * A value of {@code null} indicates that query matches should never be |
| * cached. This method should be called <b>before</b> starting using this |
| * {@link IndexSearcher}. |
| * <p>NOTE: When using a query cache, queries should not be modified after |
| * they have been passed to IndexSearcher. |
| * @see QueryCache |
| * @lucene.experimental |
| */ |
| public void setQueryCache(QueryCache queryCache) { |
| this.queryCache = queryCache; |
| } |
| |
| /** |
| * Return the query cache of this {@link IndexSearcher}. This will be either |
| * the {@link #getDefaultQueryCache() default query cache} or the query cache |
| * that was last set through {@link #setQueryCache(QueryCache)}. A return |
| * value of {@code null} indicates that caching is disabled. |
| * @lucene.experimental |
| */ |
| public QueryCache getQueryCache() { |
| return queryCache; |
| } |
| |
| /** |
| * Set the {@link QueryCachingPolicy} to use for query caching. |
| * This method should be called <b>before</b> starting using this |
| * {@link IndexSearcher}. |
| * @see QueryCachingPolicy |
| * @lucene.experimental |
| */ |
| public void setQueryCachingPolicy(QueryCachingPolicy queryCachingPolicy) { |
| this.queryCachingPolicy = Objects.requireNonNull(queryCachingPolicy); |
| } |
| |
| /** |
| * Return the query cache of this {@link IndexSearcher}. This will be either |
| * the {@link #getDefaultQueryCachingPolicy() default policy} or the policy |
| * that was last set through {@link #setQueryCachingPolicy(QueryCachingPolicy)}. |
| * @lucene.experimental |
| */ |
| public QueryCachingPolicy getQueryCachingPolicy() { |
| return queryCachingPolicy; |
| } |
| |
| /** |
| * Expert: Creates an array of leaf slices each holding a subset of the given leaves. |
| * Each {@link LeafSlice} is executed in a single thread. By default there |
| * will be one {@link LeafSlice} per leaf ({@link org.apache.lucene.index.LeafReaderContext}). |
| */ |
| protected LeafSlice[] slices(List<LeafReaderContext> leaves) { |
| LeafSlice[] slices = new LeafSlice[leaves.size()]; |
| for (int i = 0; i < slices.length; i++) { |
| slices[i] = new LeafSlice(leaves.get(i)); |
| } |
| return slices; |
| } |
| |
| /** Return the {@link IndexReader} this searches. */ |
| public IndexReader getIndexReader() { |
| return reader; |
| } |
| |
| /** |
| * Sugar for <code>.getIndexReader().document(docID)</code> |
| * @see IndexReader#document(int) |
| */ |
| public Document doc(int docID) throws IOException { |
| return reader.document(docID); |
| } |
| |
| /** |
| * Sugar for <code>.getIndexReader().document(docID, fieldVisitor)</code> |
| * @see IndexReader#document(int, StoredFieldVisitor) |
| */ |
| public void doc(int docID, StoredFieldVisitor fieldVisitor) throws IOException { |
| reader.document(docID, fieldVisitor); |
| } |
| |
| /** |
| * Sugar for <code>.getIndexReader().document(docID, fieldsToLoad)</code> |
| * @see IndexReader#document(int, Set) |
| */ |
| public Document doc(int docID, Set<String> fieldsToLoad) throws IOException { |
| return reader.document(docID, fieldsToLoad); |
| } |
| |
| /** Expert: Set the Similarity implementation used by this IndexSearcher. |
| * |
| */ |
| public void setSimilarity(Similarity similarity) { |
| this.similarity = similarity; |
| } |
| |
| /** Expert: Get the {@link Similarity} to use to compute scores. This returns the |
| * {@link Similarity} that has been set through {@link #setSimilarity(Similarity)} |
| * or the default {@link Similarity} if none has been set explicitly. */ |
| public Similarity getSimilarity() { |
| return similarity; |
| } |
| |
| /** |
| * Count how many documents match the given query. |
| */ |
| public int count(Query query) throws IOException { |
| query = rewrite(query); |
| while (true) { |
| // remove wrappers that don't matter for counts |
| if (query instanceof ConstantScoreQuery) { |
| query = ((ConstantScoreQuery) query).getQuery(); |
| } else { |
| break; |
| } |
| } |
| |
| // some counts can be computed in constant time |
| if (query instanceof MatchAllDocsQuery) { |
| return reader.numDocs(); |
| } else if (query instanceof TermQuery && reader.hasDeletions() == false) { |
| Term term = ((TermQuery) query).getTerm(); |
| int count = 0; |
| for (LeafReaderContext leaf : reader.leaves()) { |
| count += leaf.reader().docFreq(term); |
| } |
| return count; |
| } |
| |
| // general case: create a collector and count matches |
| final CollectorManager<TotalHitCountCollector, Integer> collectorManager = new CollectorManager<TotalHitCountCollector, Integer>() { |
| |
| @Override |
| public TotalHitCountCollector newCollector() throws IOException { |
| return new TotalHitCountCollector(); |
| } |
| |
| @Override |
| public Integer reduce(Collection<TotalHitCountCollector> collectors) throws IOException { |
| int total = 0; |
| for (TotalHitCountCollector collector : collectors) { |
| total += collector.getTotalHits(); |
| } |
| return total; |
| } |
| |
| }; |
| return search(query, collectorManager); |
| } |
| |
| /** Returns the leaf slices used for concurrent searching, or null if no {@code Executor} was |
| * passed to the constructor. |
| * |
| * @lucene.experimental */ |
| public LeafSlice[] getSlices() { |
| return leafSlices; |
| } |
| |
| /** Finds the top <code>n</code> |
| * hits for <code>query</code> where all results are after a previous |
| * result (<code>after</code>). |
| * <p> |
| * By passing the bottom result from a previous page as <code>after</code>, |
| * this method can be used for efficient 'deep-paging' across potentially |
| * large result sets. |
| * |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOException { |
| final int limit = Math.max(1, reader.maxDoc()); |
| if (after != null && after.doc >= limit) { |
| throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" |
| + after.doc + " limit=" + limit); |
| } |
| |
| final int cappedNumHits = Math.min(numHits, limit); |
| |
| final CollectorManager<TopScoreDocCollector, TopDocs> manager = new CollectorManager<TopScoreDocCollector, TopDocs>() { |
| |
| private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : |
| HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); |
| |
| private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); |
| |
| @Override |
| public TopScoreDocCollector newCollector() throws IOException { |
| return TopScoreDocCollector.create(cappedNumHits, after, hitsThresholdChecker, minScoreAcc); |
| } |
| |
| @Override |
| public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException { |
| final TopDocs[] topDocs = new TopDocs[collectors.size()]; |
| int i = 0; |
| for (TopScoreDocCollector collector : collectors) { |
| topDocs[i++] = collector.topDocs(); |
| } |
| return TopDocs.merge(0, cappedNumHits, topDocs, true); |
| } |
| |
| }; |
| |
| return search(query, manager); |
| } |
| |
| /** Finds the top <code>n</code> |
| * hits for <code>query</code>. |
| * |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public TopDocs search(Query query, int n) |
| throws IOException { |
| return searchAfter(null, query, n); |
| } |
| |
| /** Lower-level search API. |
| * |
| * <p>{@link LeafCollector#collect(int)} is called for every matching document. |
| * |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public void search(Query query, Collector results) |
| throws IOException { |
| query = rewrite(query); |
| search(leafContexts, createWeight(query, results.scoreMode(), 1), results); |
| } |
| |
| /** Search implementation with arbitrary sorting, plus |
| * control over whether hit scores and max score |
| * should be computed. Finds |
| * the top <code>n</code> hits for <code>query</code>, and sorting |
| * the hits by the criteria in <code>sort</code>. |
| * If <code>doDocScores</code> is <code>true</code> |
| * then the score of each hit will be computed and |
| * returned. If <code>doMaxScore</code> is |
| * <code>true</code> then the maximum score over all |
| * collected hits will be computed. |
| * |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public TopFieldDocs search(Query query, int n, |
| Sort sort, boolean doDocScores) throws IOException { |
| return searchAfter(null, query, n, sort, doDocScores); |
| } |
| |
| /** |
| * Search implementation with arbitrary sorting. |
| * @param query The query to search for |
| * @param n Return only the top n results |
| * @param sort The {@link org.apache.lucene.search.Sort} object |
| * @return The top docs, sorted according to the supplied {@link org.apache.lucene.search.Sort} instance |
| * @throws IOException if there is a low-level I/O error |
| */ |
| public TopFieldDocs search(Query query, int n, Sort sort) throws IOException { |
| return searchAfter(null, query, n, sort, false); |
| } |
| |
| /** Finds the top <code>n</code> |
| * hits for <code>query</code> where all results are after a previous |
| * result (<code>after</code>). |
| * <p> |
| * By passing the bottom result from a previous page as <code>after</code>, |
| * this method can be used for efficient 'deep-paging' across potentially |
| * large result sets. |
| * |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException { |
| return searchAfter(after, query, n, sort, false); |
| } |
| |
| /** Finds the top <code>n</code> |
| * hits for <code>query</code> where all results are after a previous |
| * result (<code>after</code>), allowing control over |
| * whether hit scores and max score should be computed. |
| * <p> |
| * By passing the bottom result from a previous page as <code>after</code>, |
| * this method can be used for efficient 'deep-paging' across potentially |
| * large result sets. If <code>doDocScores</code> is <code>true</code> |
| * then the score of each hit will be computed and |
| * returned. If <code>doMaxScore</code> is |
| * <code>true</code> then the maximum score over all |
| * collected hits will be computed. |
| * |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public TopFieldDocs searchAfter(ScoreDoc after, Query query, int numHits, Sort sort, |
| boolean doDocScores) throws IOException { |
| if (after != null && !(after instanceof FieldDoc)) { |
| // TODO: if we fix type safety of TopFieldDocs we can |
| // remove this |
| throw new IllegalArgumentException("after must be a FieldDoc; got " + after); |
| } |
| return searchAfter((FieldDoc) after, query, numHits, sort, doDocScores); |
| } |
| |
| private TopFieldDocs searchAfter(FieldDoc after, Query query, int numHits, Sort sort, |
| boolean doDocScores) throws IOException { |
| final int limit = Math.max(1, reader.maxDoc()); |
| if (after != null && after.doc >= limit) { |
| throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" |
| + after.doc + " limit=" + limit); |
| } |
| final int cappedNumHits = Math.min(numHits, limit); |
| final Sort rewrittenSort = sort.rewrite(this); |
| |
| final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() { |
| |
| private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : |
| HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); |
| |
| private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); |
| |
| @Override |
| public TopFieldCollector newCollector() throws IOException { |
| // TODO: don't pay the price for accurate hit counts by default |
| return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, hitsThresholdChecker, minScoreAcc); |
| } |
| |
| @Override |
| public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException { |
| final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; |
| int i = 0; |
| for (TopFieldCollector collector : collectors) { |
| topDocs[i++] = collector.topDocs(); |
| } |
| return TopDocs.merge(rewrittenSort, 0, cappedNumHits, topDocs, true); |
| } |
| |
| }; |
| |
| TopFieldDocs topDocs = search(query, manager); |
| if (doDocScores) { |
| TopFieldCollector.populateScores(topDocs.scoreDocs, this, query); |
| } |
| return topDocs; |
| } |
| |
| /** |
| * Lower-level search API. |
| * Search all leaves using the given {@link CollectorManager}. In contrast |
| * to {@link #search(Query, Collector)}, this method will use the searcher's |
| * {@link Executor} in order to parallelize execution of the collection |
| * on the configured {@link #leafSlices}. |
| * @see CollectorManager |
| * @lucene.experimental |
| */ |
| public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException { |
| if (executor == null || leafSlices.length <= 1) { |
| final C collector = collectorManager.newCollector(); |
| search(query, collector); |
| return collectorManager.reduce(Collections.singletonList(collector)); |
| } else { |
| final List<C> collectors = new ArrayList<>(leafSlices.length); |
| ScoreMode scoreMode = null; |
| for (int i = 0; i < leafSlices.length; ++i) { |
| final C collector = collectorManager.newCollector(); |
| collectors.add(collector); |
| if (scoreMode == null) { |
| scoreMode = collector.scoreMode(); |
| } else if (scoreMode != collector.scoreMode()) { |
| throw new IllegalStateException("CollectorManager does not always produce collectors with the same score mode"); |
| } |
| } |
| if (scoreMode == null) { |
| // no segments |
| scoreMode = ScoreMode.COMPLETE; |
| } |
| query = rewrite(query); |
| final Weight weight = createWeight(query, scoreMode, 1); |
| final List<Future<C>> topDocsFutures = new ArrayList<>(leafSlices.length); |
| for (int i = 0; i < leafSlices.length - 1; ++i) { |
| final LeafReaderContext[] leaves = leafSlices[i].leaves; |
| final C collector = collectors.get(i); |
| FutureTask<C> task = new FutureTask<>(() -> { |
| search(Arrays.asList(leaves), weight, collector); |
| return collector; |
| }); |
| executor.execute(task); |
| topDocsFutures.add(task); |
| } |
| final LeafReaderContext[] leaves = leafSlices[leafSlices.length - 1].leaves; |
| final C collector = collectors.get(leafSlices.length - 1); |
| // execute the last on the caller thread |
| search(Arrays.asList(leaves), weight, collector); |
| topDocsFutures.add(CompletableFuture.completedFuture(collector)); |
| final List<C> collectedCollectors = new ArrayList<>(); |
| for (Future<C> future : topDocsFutures) { |
| try { |
| collectedCollectors.add(future.get()); |
| } catch (InterruptedException e) { |
| throw new ThreadInterruptedException(e); |
| } catch (ExecutionException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| return collectorManager.reduce(collectors); |
| } |
| } |
| |
| /** |
| * Lower-level search API. |
| * |
| * <p> |
| * {@link LeafCollector#collect(int)} is called for every document. <br> |
| * |
| * <p> |
| * NOTE: this method executes the searches on all given leaves exclusively. |
| * To search across all the searchers leaves use {@link #leafContexts}. |
| * |
| * @param leaves |
| * the searchers leaves to execute the searches on |
| * @param weight |
| * to match documents |
| * @param collector |
| * to receive hits |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) |
| throws IOException { |
| |
| // TODO: should we make this |
| // threaded...? the Collector could be sync'd? |
| // always use single thread: |
| for (LeafReaderContext ctx : leaves) { // search each subreader |
| final LeafCollector leafCollector; |
| try { |
| leafCollector = collector.getLeafCollector(ctx); |
| } catch (CollectionTerminatedException e) { |
| // there is no doc of interest in this reader context |
| // continue with the following leaf |
| continue; |
| } |
| BulkScorer scorer = weight.bulkScorer(ctx); |
| if (scorer != null) { |
| try { |
| scorer.score(leafCollector, ctx.reader().getLiveDocs()); |
| } catch (CollectionTerminatedException e) { |
| // collection was terminated prematurely |
| // continue with the following leaf |
| } |
| } |
| } |
| } |
| |
| /** Expert: called to re-write queries into primitive queries. |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| public Query rewrite(Query original) throws IOException { |
| Query query = original; |
| for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query; |
| rewrittenQuery = query.rewrite(reader)) { |
| query = rewrittenQuery; |
| } |
| return query; |
| } |
| |
| /** Returns an Explanation that describes how <code>doc</code> scored against |
| * <code>query</code>. |
| * |
| * <p>This is intended to be used in developing Similarity implementations, |
| * and, for good performance, should not be displayed with every hit. |
| * Computing an explanation is as expensive as executing the query over the |
| * entire index. |
| */ |
| public Explanation explain(Query query, int doc) throws IOException { |
| query = rewrite(query); |
| return explain(createWeight(query, ScoreMode.COMPLETE, 1), doc); |
| } |
| |
| /** Expert: low-level implementation method |
| * Returns an Explanation that describes how <code>doc</code> scored against |
| * <code>weight</code>. |
| * |
| * <p>This is intended to be used in developing Similarity implementations, |
| * and, for good performance, should not be displayed with every hit. |
| * Computing an explanation is as expensive as executing the query over the |
| * entire index. |
| * <p>Applications should call {@link IndexSearcher#explain(Query, int)}. |
| * @throws BooleanQuery.TooManyClauses If a query would exceed |
| * {@link BooleanQuery#getMaxClauseCount()} clauses. |
| */ |
| protected Explanation explain(Weight weight, int doc) throws IOException { |
| int n = ReaderUtil.subIndex(doc, leafContexts); |
| final LeafReaderContext ctx = leafContexts.get(n); |
| int deBasedDoc = doc - ctx.docBase; |
| final Bits liveDocs = ctx.reader().getLiveDocs(); |
| if (liveDocs != null && liveDocs.get(deBasedDoc) == false) { |
| return Explanation.noMatch("Document " + doc + " is deleted"); |
| } |
| return weight.explain(ctx, deBasedDoc); |
| } |
| |
| /** |
| * Creates a {@link Weight} for the given query, potentially adding caching |
| * if possible and configured. |
| * @lucene.experimental |
| */ |
| public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { |
| final QueryCache queryCache = this.queryCache; |
| Weight weight = query.createWeight(this, scoreMode, boost); |
| if (scoreMode.needsScores() == false && queryCache != null) { |
| weight = queryCache.doCache(weight, queryCachingPolicy); |
| } |
| return weight; |
| } |
| |
| /** |
| * Returns this searchers the top-level {@link IndexReaderContext}. |
| * @see IndexReader#getContext() |
| */ |
| /* sugar for #getReader().getTopReaderContext() */ |
| public IndexReaderContext getTopReaderContext() { |
| return readerContext; |
| } |
| |
| /** |
| * A class holding a subset of the {@link IndexSearcher}s leaf contexts to be |
| * executed within a single thread. |
| * |
| * @lucene.experimental |
| */ |
| public static class LeafSlice { |
| |
| /** The leaves that make up this slice. |
| * |
| * @lucene.experimental */ |
| public final LeafReaderContext[] leaves; |
| |
| public LeafSlice(LeafReaderContext... leaves) { |
| this.leaves = leaves; |
| } |
| } |
| |
| @Override |
| public String toString() { |
| return "IndexSearcher(" + reader + "; executor=" + executor + ")"; |
| } |
| |
| /** |
| * Returns {@link TermStatistics} for a term, or {@code null} if |
| * the term does not exist. |
| * @deprecated in favor of {@link #termStatistics(Term, int, long)}. |
| */ |
| @Deprecated |
| public final TermStatistics termStatistics(Term term, TermStates context) throws IOException { |
| if (context.docFreq() == 0) { |
| return null; |
| } else { |
| return termStatistics(term, context.docFreq(), context.totalTermFreq()); |
| } |
| } |
| |
| /** |
| * Returns {@link TermStatistics} for a term. |
| * <p> |
| * This can be overridden for example, to return a term's statistics |
| * across a distributed collection. |
| * |
| * @param docFreq The document frequency of the term. It must be greater or equal to 1. |
| * @param totalTermFreq The total term frequency. |
| * @return A {@link TermStatistics} (never null). |
| * @lucene.experimental |
| */ |
| public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException { |
| // This constructor will throw an exception if docFreq <= 0. |
| return new TermStatistics(term.bytes(), docFreq, totalTermFreq); |
| } |
| |
| /** |
| * Returns {@link CollectionStatistics} for a field, or {@code null} if |
| * the field does not exist (has no indexed terms) |
| * |
| * This can be overridden for example, to return a field's statistics |
| * across a distributed collection. |
| * @lucene.experimental |
| */ |
| public CollectionStatistics collectionStatistics(String field) throws IOException { |
| assert field != null; |
| long docCount = 0; |
| long sumTotalTermFreq = 0; |
| long sumDocFreq = 0; |
| for (LeafReaderContext leaf : reader.leaves()) { |
| final Terms terms = leaf.reader().terms(field); |
| if (terms == null) { |
| continue; |
| } |
| docCount += terms.getDocCount(); |
| sumTotalTermFreq += terms.getSumTotalTermFreq(); |
| sumDocFreq += terms.getSumDocFreq(); |
| } |
| if (docCount == 0) { |
| return null; |
| } |
| return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); |
| } |
| |
| /** |
| * Returns this searchers executor or <code>null</code> if no executor was provided |
| */ |
| public Executor getExecutor() { |
| return executor; |
| } |
| } |