| using Lucene.Net.Diagnostics; |
| using Lucene.Net.Support; |
| using Lucene.Net.Support.Threading; |
| using System; |
| using System.Collections; |
| using System.Collections.Generic; |
| using System.IO; |
| using System.Threading; |
| using System.Threading.Tasks; |
| |
| namespace Lucene.Net.Search |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; |
| using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity; |
| using Document = Documents.Document; |
| using IndexReader = Lucene.Net.Index.IndexReader; |
| using IndexReaderContext = Lucene.Net.Index.IndexReaderContext; |
| using MultiFields = Lucene.Net.Index.MultiFields; |
| using ReaderUtil = Lucene.Net.Index.ReaderUtil; |
| using Similarity = Lucene.Net.Search.Similarities.Similarity; |
| using StoredFieldVisitor = Lucene.Net.Index.StoredFieldVisitor; |
| using Term = Lucene.Net.Index.Term; |
| using TermContext = Lucene.Net.Index.TermContext; |
| using Terms = Lucene.Net.Index.Terms; |
| |
| /// <summary> |
| /// Implements search over a single <see cref="Index.IndexReader"/>. |
| /// |
| /// <para/>Applications usually need only call the inherited |
| /// <see cref="Search(Query,int)"/> |
| /// or <see cref="Search(Query,Filter,int)"/> methods. For |
| /// performance reasons, if your index is unchanging, you |
| /// should share a single <see cref="IndexSearcher"/> instance across |
| /// multiple searches instead of creating a new one |
| /// per-search. If your index has changed and you wish to |
| /// see the changes reflected in searching, you should |
| /// use <see cref="Index.DirectoryReader.OpenIfChanged(Index.DirectoryReader)"/> |
| /// to obtain a new reader and |
| /// then create a new <see cref="IndexSearcher"/> from that. Also, for |
| /// low-latency turnaround it's best to use a near-real-time |
| /// reader (<see cref="Index.DirectoryReader.Open(Index.IndexWriter,bool)"/>). |
| /// Once you have a new <see cref="Index.IndexReader"/>, it's relatively |
| /// cheap to create a new <see cref="IndexSearcher"/> from it. |
| /// |
| /// <para/><a name="thread-safety"></a><p><b>NOTE</b>: |
| /// <see cref="IndexSearcher"/> instances are completely |
| /// thread safe, meaning multiple threads can call any of its |
| /// methods, concurrently. If your application requires |
| /// external synchronization, you should <b>not</b> |
| /// synchronize on the <see cref="IndexSearcher"/> instance; |
| /// use your own (non-Lucene) objects instead.</p> |
| /// </summary> |
| public class IndexSearcher |
| { |
| internal readonly IndexReader reader; // package private for testing! |
| |
| // NOTE: these members might change in incompatible ways |
| // in the next release |
| protected readonly IndexReaderContext m_readerContext; |
| |
| protected internal readonly IList<AtomicReaderContext> m_leafContexts; |
| |
| /// <summary> |
| /// Used with executor - each slice holds a set of leafs executed within one thread </summary> |
| protected readonly LeafSlice[] m_leafSlices; |
| |
| // These are only used for multi-threaded search |
| private readonly TaskScheduler executor; |
| |
| // the default Similarity |
| private static readonly Similarity defaultSimilarity = new DefaultSimilarity(); |
| |
| /// <summary> |
| /// Expert: returns a default <see cref="Similarities.Similarity"/> instance. |
| /// In general, this method is only called to initialize searchers and writers. |
| /// User code and query implementations should respect |
| /// <see cref="IndexSearcher.Similarity"/>. |
| /// <para/> |
| /// @lucene.internal |
| /// </summary> |
| public static Similarity DefaultSimilarity => defaultSimilarity; |
| |
| /// <summary> |
| /// The <see cref="Similarities.Similarity"/> implementation used by this searcher. </summary> |
| private Similarity similarity = defaultSimilarity; |
| |
| /// <summary> |
| /// Creates a searcher searching the provided index. </summary> |
| public IndexSearcher(IndexReader r) |
| : this(r, null) |
| { |
| } |
| |
| /// <summary> |
| /// Runs searches for each segment separately, using the |
| /// provided <see cref="TaskScheduler"/>. <see cref="IndexSearcher"/> will not |
| /// shutdown/awaitTermination this <see cref="TaskScheduler"/> on |
| /// dispose; you must do so, eventually, on your own. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| public IndexSearcher(IndexReader r, TaskScheduler executor) |
| : this(r.Context, executor) |
| { |
| } |
| |
| /// <summary> |
| /// Creates a searcher searching the provided top-level <see cref="IndexReaderContext"/>. |
| /// <para/> |
| /// Given a non-<c>null</c> <see cref="TaskScheduler"/> this method runs |
| /// searches for each segment separately, using the provided <see cref="TaskScheduler"/>. |
| /// <see cref="IndexSearcher"/> will not shutdown/awaitTermination this <see cref="TaskScheduler"/> on |
| /// close; you must do so, eventually, on your own. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| /// <seealso cref="IndexReaderContext"/> |
| /// <seealso cref="IndexReader.Context"/> |
| public IndexSearcher(IndexReaderContext context, TaskScheduler executor) |
| { |
| if (Debugging.AssertsEnabled) Debugging.Assert(context.IsTopLevel,"IndexSearcher's ReaderContext must be topLevel for reader {0}", context.Reader); |
| reader = context.Reader; |
| this.executor = executor; |
| this.m_readerContext = context; |
| m_leafContexts = context.Leaves; |
| this.m_leafSlices = executor == null ? null : Slices(m_leafContexts); |
| } |
| |
| /// <summary> |
| /// Creates a searcher searching the provided top-level <see cref="IndexReaderContext"/>. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| /// <seealso cref="IndexReaderContext"/> |
| /// <seealso cref="IndexReader.Context"/> |
| public IndexSearcher(IndexReaderContext context) |
| : this(context, null) |
| { |
| } |
| |
| /// <summary> |
| /// Expert: Creates an array of leaf slices each holding a subset of the given leaves. |
| /// Each <see cref="LeafSlice"/> is executed in a single thread. By default there |
| /// will be one <see cref="LeafSlice"/> per leaf (<see cref="AtomicReaderContext"/>). |
| /// </summary> |
| protected virtual LeafSlice[] Slices(IList<AtomicReaderContext> leaves) |
| { |
| LeafSlice[] slices = new LeafSlice[leaves.Count]; |
| for (int i = 0; i < slices.Length; i++) |
| { |
| slices[i] = new LeafSlice(leaves[i]); |
| } |
| return slices; |
| } |
| |
| /// <summary> |
| /// Return the <see cref="Index.IndexReader"/> this searches. </summary> |
| public virtual IndexReader IndexReader => reader; |
| |
| /// <summary> |
| /// Sugar for <code>.IndexReader.Document(docID)</code> </summary> |
| /// <seealso cref="IndexReader.Document(int)"/> |
| public virtual Document Doc(int docID) |
| { |
| return reader.Document(docID); |
| } |
| |
| /// <summary> |
| /// Sugar for <code>.IndexReader.Document(docID, fieldVisitor)</code> </summary> |
| /// <seealso cref="IndexReader.Document(int, StoredFieldVisitor)"/> |
| public virtual void Doc(int docID, StoredFieldVisitor fieldVisitor) |
| { |
| reader.Document(docID, fieldVisitor); |
| } |
| |
| /// <summary> |
| /// Sugar for <code>.IndexReader.Document(docID, fieldsToLoad)</code> </summary> |
| /// <seealso cref="IndexReader.Document(int, ISet{string})"/> |
| public virtual Document Doc(int docID, ISet<string> fieldsToLoad) |
| { |
| return reader.Document(docID, fieldsToLoad); |
| } |
| |
| /// @deprecated Use <see cref="Doc(int, ISet{string})"/> instead. |
| [Obsolete("Use <seealso cref=#doc(int, java.util.Set)/> instead.")] |
| public Document Document(int docID, ISet<string> fieldsToLoad) |
| { |
| return Doc(docID, fieldsToLoad); |
| } |
| |
| /// <summary> |
| /// Expert: Set the <see cref="Similarities.Similarity"/> implementation used by this IndexSearcher. |
| /// </summary> |
| public virtual Similarity Similarity |
| { |
| get => similarity; |
| set => this.similarity = value; |
| } |
| |
| /// <summary> |
| /// @lucene.internal </summary> |
| protected virtual Query WrapFilter(Query query, Filter filter) |
| { |
| return (filter == null) ? query : new FilteredQuery(query, filter); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for top <paramref name="query"/> where all results are after a previous |
| /// result (top <paramref name="after"/>). |
| /// <para/> |
| /// By passing the bottom result from a previous page as <paramref name="after"/>, |
| /// this method can be used for efficient 'deep-paging' across potentially |
| /// large result sets. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs SearchAfter(ScoreDoc after, Query query, int n) |
| { |
| return Search(CreateNormalizedWeight(query), after, n); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for <paramref name="query"/>, applying <paramref name="filter"/> if non-null, |
| /// where all results are after a previous result (<paramref name="after"/>). |
| /// <para/> |
| /// By passing the bottom result from a previous page as <paramref name="after"/>, |
| /// this method can be used for efficient 'deep-paging' across potentially |
| /// large result sets. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs SearchAfter(ScoreDoc after, Query query, Filter filter, int n) |
| { |
| return Search(CreateNormalizedWeight(WrapFilter(query, filter)), after, n); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for <paramref name="query"/>. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs Search(Query query, int n) |
| { |
| return Search(query, null, n); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for <paramref name="query"/>, applying <paramref name="filter"/> if non-null. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs Search(Query query, Filter filter, int n) |
| { |
| return Search(CreateNormalizedWeight(WrapFilter(query, filter)), null, n); |
| } |
| |
| /// <summary> |
| /// Lower-level search API. |
| /// |
| /// <para/><see cref="ICollector.Collect(int)"/> is called for every matching |
| /// document. |
| /// </summary> |
| /// <param name="query"> To match documents </param> |
| /// <param name="filter"> Ef non-null, used to permit documents to be collected. </param> |
| /// <param name="results"> To receive hits </param> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual void Search(Query query, Filter filter, ICollector results) |
| { |
| Search(m_leafContexts, CreateNormalizedWeight(WrapFilter(query, filter)), results); |
| } |
| |
| /// <summary> |
| /// Lower-level search API. |
| /// |
| /// <para/><seealso cref="ICollector.Collect(int)"/> is called for every matching document. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual void Search(Query query, ICollector results) |
| { |
| Search(m_leafContexts, CreateNormalizedWeight(query), results); |
| } |
| |
| /// <summary> |
| /// Search implementation with arbitrary sorting. Finds |
| /// the top <paramref name="n"/> hits for <paramref name="query"/>, applying |
| /// <paramref name="filter"/> if non-null, and sorting the hits by the criteria in |
| /// <paramref name="sort"/>. |
| /// |
| /// <para/>NOTE: this does not compute scores by default; use |
| /// <see cref="IndexSearcher.Search(Query,Filter,int,Sort,bool,bool)"/> to |
| /// control scoring. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopFieldDocs Search(Query query, Filter filter, int n, Sort sort) |
| { |
| return Search(CreateNormalizedWeight(WrapFilter(query, filter)), n, sort, false, false); |
| } |
| |
| /// <summary> |
| /// Search implementation with arbitrary sorting, plus |
| /// control over whether hit scores and max score |
| /// should be computed. Finds |
| /// the top <paramref name="n"/> hits for <paramref name="query"/>, applying |
| /// <paramref name="filter"/> if non-null, and sorting the hits by the criteria in |
| /// <paramref name="sort"/>. If <paramref name="doDocScores"/> is <c>true</c> |
| /// then the score of each hit will be computed and |
| /// returned. If <paramref name="doMaxScore"/> is |
| /// <c>true</c> then the maximum score over all |
| /// collected hits will be computed. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopFieldDocs Search(Query query, Filter filter, int n, Sort sort, bool doDocScores, bool doMaxScore) |
| { |
| return Search(CreateNormalizedWeight(WrapFilter(query, filter)), n, sort, doDocScores, doMaxScore); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for <paramref name="query"/>, applying <paramref name="filter"/> if non-null, |
| /// where all results are after a previous result (<paramref name="after"/>). |
| /// <para/> |
| /// By passing the bottom result from a previous page as <paramref name="after"/>, |
| /// this method can be used for efficient 'deep-paging' across potentially |
| /// large result sets. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <seealso cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs SearchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort) |
| { |
| if (after != null && !(after is FieldDoc)) |
| { |
| // TODO: if we fix type safety of TopFieldDocs we can |
| // remove this |
| throw new ArgumentException("after must be a FieldDoc; got " + after); |
| } |
| return Search(CreateNormalizedWeight(WrapFilter(query, filter)), (FieldDoc)after, n, sort, true, false, false); |
| } |
| |
| /// <summary> |
| /// Search implementation with arbitrary sorting and no filter. </summary> |
| /// <param name="query"> The query to search for </param> |
| /// <param name="n"> Return only the top n results </param> |
| /// <param name="sort"> The <see cref="Lucene.Net.Search.Sort"/> object </param> |
| /// <returns> The top docs, sorted according to the supplied <see cref="Lucene.Net.Search.Sort"/> instance </returns> |
| /// <exception cref="IOException"> if there is a low-level I/O error </exception> |
| public virtual TopFieldDocs Search(Query query, int n, Sort sort) |
| { |
| return Search(CreateNormalizedWeight(query), n, sort, false, false); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for <paramref name="query"/> where all results are after a previous |
| /// result (<paramref name="after"/>). |
| /// <para/> |
| /// By passing the bottom result from a previous page as <paramref name="after"/>, |
| /// this method can be used for efficient 'deep-paging' across potentially |
| /// large result sets. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs SearchAfter(ScoreDoc after, Query query, int n, Sort sort) |
| { |
| if (after != null && !(after is FieldDoc)) |
| { |
| // TODO: if we fix type safety of TopFieldDocs we can |
| // remove this |
| throw new ArgumentException("after must be a FieldDoc; got " + after); |
| } |
| return Search(CreateNormalizedWeight(query), (FieldDoc)after, n, sort, true, false, false); |
| } |
| |
| /// <summary> |
| /// Finds the top <paramref name="n"/> |
| /// hits for <paramref name="query"/> where all results are after a previous |
| /// result (<paramref name="after"/>), allowing control over |
| /// whether hit scores and max score should be computed. |
| /// <para/> |
| /// By passing the bottom result from a previous page as <paramref name="after"/>, |
| /// this method can be used for efficient 'deep-paging' across potentially |
| /// large result sets. If <paramref name="doDocScores"/> is <c>true</c> |
| /// then the score of each hit will be computed and |
| /// returned. If <paramref name="doMaxScore"/> is |
| /// <c>true</c> then the maximum score over all |
| /// collected hits will be computed. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual TopDocs SearchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort, bool doDocScores, bool doMaxScore) |
| { |
| if (after != null && !(after is FieldDoc)) |
| { |
| // TODO: if we fix type safety of TopFieldDocs we can |
| // remove this |
| throw new ArgumentException("after must be a FieldDoc; got " + after); |
| } |
| return Search(CreateNormalizedWeight(WrapFilter(query, filter)), (FieldDoc)after, n, sort, true, doDocScores, doMaxScore); |
| } |
| |
| /// <summary> |
| /// Expert: Low-level search implementation. Finds the top <paramref name="nDocs"/> |
| /// hits for <c>query</c>, applying <c>filter</c> if non-null. |
| /// |
| /// <para/>Applications should usually call <see cref="IndexSearcher.Search(Query,int)"/> or |
| /// <see cref="IndexSearcher.Search(Query,Filter,int)"/> instead. </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| protected virtual TopDocs Search(Weight weight, ScoreDoc after, int nDocs) |
| { |
| int limit = reader.MaxDoc; |
| if (limit == 0) |
| { |
| limit = 1; |
| } |
| if (after != null && after.Doc >= limit) |
| { |
| throw new ArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + after.Doc + " limit=" + limit); |
| } |
| nDocs = Math.Min(nDocs, limit); |
| |
| if (executor == null) |
| { |
| return Search(m_leafContexts, weight, after, nDocs); |
| } |
| else |
| { |
| HitQueue hq = new HitQueue(nDocs, false); |
| ReentrantLock @lock = new ReentrantLock(); |
| ExecutionHelper<TopDocs> runner = new ExecutionHelper<TopDocs>(executor); |
| |
| for (int i = 0; i < m_leafSlices.Length; i++) // search each sub |
| { |
| runner.Submit(new SearcherCallableNoSort(@lock, this, m_leafSlices[i], weight, after, nDocs, hq)); |
| } |
| |
| int totalHits = 0; |
| float maxScore = float.NegativeInfinity; |
| foreach (TopDocs topDocs in runner) |
| { |
| if (topDocs.TotalHits != 0) |
| { |
| totalHits += topDocs.TotalHits; |
| maxScore = Math.Max(maxScore, topDocs.MaxScore); |
| } |
| } |
| |
| var scoreDocs = new ScoreDoc[hq.Count]; |
| for (int i = hq.Count - 1; i >= 0; i--) // put docs in array |
| { |
| scoreDocs[i] = hq.Pop(); |
| } |
| |
| return new TopDocs(totalHits, scoreDocs, maxScore); |
| } |
| } |
| |
| /// <summary> |
| /// Expert: Low-level search implementation. Finds the top <code>n</code> |
| /// hits for <c>query</c>. |
| /// |
| /// <para/>Applications should usually call <see cref="IndexSearcher.Search(Query,int)"/> or |
| /// <see cref="IndexSearcher.Search(Query,Filter,int)"/> instead. </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| protected virtual TopDocs Search(IList<AtomicReaderContext> leaves, Weight weight, ScoreDoc after, int nDocs) |
| { |
| // single thread |
| int limit = reader.MaxDoc; |
| if (limit == 0) |
| { |
| limit = 1; |
| } |
| nDocs = Math.Min(nDocs, limit); |
| TopScoreDocCollector collector = TopScoreDocCollector.Create(nDocs, after, !weight.ScoresDocsOutOfOrder); |
| Search(leaves, weight, collector); |
| return collector.GetTopDocs(); |
| } |
| |
| /// <summary> |
| /// Expert: Low-level search implementation with arbitrary |
| /// sorting and control over whether hit scores and max |
| /// score should be computed. Finds |
| /// the top <paramref name="nDocs"/> hits for <c>query</c> and sorting the hits |
| /// by the criteria in <paramref name="sort"/>. |
| /// |
| /// <para/>Applications should usually call |
| /// <see cref="IndexSearcher.Search(Query,Filter,int,Sort)"/> instead. |
| /// </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| protected virtual TopFieldDocs Search(Weight weight, int nDocs, Sort sort, bool doDocScores, bool doMaxScore) |
| { |
| return Search(weight, null, nDocs, sort, true, doDocScores, doMaxScore); |
| } |
| |
| /// <summary> |
| /// Just like <see cref="Search(Weight, int, Sort, bool, bool)"/>, but you choose |
| /// whether or not the fields in the returned <see cref="FieldDoc"/> instances should |
| /// be set by specifying <paramref name="fillFields"/>. |
| /// </summary> |
| protected virtual TopFieldDocs Search(Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore) |
| { |
| if (sort == null) |
| { |
| throw new ArgumentNullException("Sort must not be null"); |
| } |
| |
| int limit = reader.MaxDoc; |
| if (limit == 0) |
| { |
| limit = 1; |
| } |
| nDocs = Math.Min(nDocs, limit); |
| |
| if (executor == null) |
| { |
| // use all leaves here! |
| return Search(m_leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore); |
| } |
| else |
| { |
| TopFieldCollector topCollector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, false); |
| |
| ReentrantLock @lock = new ReentrantLock(); |
| ExecutionHelper<TopFieldDocs> runner = new ExecutionHelper<TopFieldDocs>(executor); |
| for (int i = 0; i < m_leafSlices.Length; i++) // search each leaf slice |
| { |
| runner.Submit(new SearcherCallableWithSort(@lock, this, m_leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore)); |
| } |
| int totalHits = 0; |
| float maxScore = float.NegativeInfinity; |
| foreach (TopFieldDocs topFieldDocs in runner) |
| { |
| if (topFieldDocs.TotalHits != 0) |
| { |
| totalHits += topFieldDocs.TotalHits; |
| maxScore = Math.Max(maxScore, topFieldDocs.MaxScore); |
| } |
| } |
| |
| TopFieldDocs topDocs = (TopFieldDocs)topCollector.GetTopDocs(); |
| |
| return new TopFieldDocs(totalHits, topDocs.ScoreDocs, topDocs.Fields, topDocs.MaxScore); |
| } |
| } |
| |
| /// <summary> |
| /// Just like <see cref="Search(Weight, int, Sort, bool, bool)"/>, but you choose |
| /// whether or not the fields in the returned <see cref="FieldDoc"/> instances should |
| /// be set by specifying <paramref name="fillFields"/>. |
| /// </summary> |
| protected virtual TopFieldDocs Search(IList<AtomicReaderContext> leaves, Weight weight, FieldDoc after, int nDocs, Sort sort, bool fillFields, bool doDocScores, bool doMaxScore) |
| { |
| // single thread |
| int limit = reader.MaxDoc; |
| if (limit == 0) |
| { |
| limit = 1; |
| } |
| nDocs = Math.Min(nDocs, limit); |
| |
| TopFieldCollector collector = TopFieldCollector.Create(sort, nDocs, after, fillFields, doDocScores, doMaxScore, !weight.ScoresDocsOutOfOrder); |
| Search(leaves, weight, collector); |
| return (TopFieldDocs)collector.GetTopDocs(); |
| } |
| |
| /// <summary> |
| /// Lower-level search API. |
| /// |
| /// <para/> |
| /// <seealso cref="ICollector.Collect(int)"/> is called for every document. |
| /// |
| /// <para/> |
| /// NOTE: this method executes the searches on all given leaves exclusively. |
| /// To search across all the searchers leaves use <see cref="m_leafContexts"/>. |
| /// </summary> |
| /// <param name="leaves"> |
| /// The searchers leaves to execute the searches on </param> |
| /// <param name="weight"> |
| /// To match documents </param> |
| /// <param name="collector"> |
| /// To receive hits </param> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| protected virtual void Search(IList<AtomicReaderContext> leaves, Weight weight, ICollector collector) |
| { |
| // TODO: should we make this |
| // threaded...? the Collector could be sync'd? |
| // always use single thread: |
| foreach (AtomicReaderContext ctx in leaves) // search each subreader |
| { |
| try |
| { |
| collector.SetNextReader(ctx); |
| } |
| catch (CollectionTerminatedException) |
| { |
| // there is no doc of interest in this reader context |
| // continue with the following leaf |
| continue; |
| } |
| BulkScorer scorer = weight.GetBulkScorer(ctx, !collector.AcceptsDocsOutOfOrder, ctx.AtomicReader.LiveDocs); |
| if (scorer != null) |
| { |
| try |
| { |
| scorer.Score(collector); |
| } |
| catch (CollectionTerminatedException) |
| { |
| // collection was terminated prematurely |
| // continue with the following leaf |
| } |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Expert: called to re-write queries into primitive queries. </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| public virtual Query Rewrite(Query original) |
| { |
| Query query = original; |
| for (Query rewrittenQuery = query.Rewrite(reader); rewrittenQuery != query; rewrittenQuery = query.Rewrite(reader)) |
| { |
| query = rewrittenQuery; |
| } |
| return query; |
| } |
| |
| /// <summary> |
| /// Returns an <see cref="Explanation"/> that describes how <paramref name="doc"/> scored against |
| /// <paramref name="query"/>. |
| /// |
| /// <para/>This is intended to be used in developing <see cref="Similarities.Similarity"/> implementations, |
| /// and, for good performance, should not be displayed with every hit. |
| /// Computing an explanation is as expensive as executing the query over the |
| /// entire index. |
| /// </summary> |
| public virtual Explanation Explain(Query query, int doc) |
| { |
| return Explain(CreateNormalizedWeight(query), doc); |
| } |
| |
| /// <summary> |
| /// Expert: low-level implementation method |
| /// Returns an <see cref="Explanation"/> that describes how <paramref name="doc"/> scored against |
| /// <paramref name="weight"/>. |
| /// |
| /// <para/>This is intended to be used in developing <see cref="Similarities.Similarity"/> implementations, |
| /// and, for good performance, should not be displayed with every hit. |
| /// Computing an explanation is as expensive as executing the query over the |
| /// entire index. |
| /// <para/>Applications should call <see cref="IndexSearcher.Explain(Query, int)"/>. </summary> |
| /// <exception cref="BooleanQuery.TooManyClausesException"> If a query would exceed |
| /// <see cref="BooleanQuery.MaxClauseCount"/> clauses. </exception> |
| protected virtual Explanation Explain(Weight weight, int doc) |
| { |
| int n = ReaderUtil.SubIndex(doc, m_leafContexts); |
| AtomicReaderContext ctx = m_leafContexts[n]; |
| int deBasedDoc = doc - ctx.DocBase; |
| |
| return weight.Explain(ctx, deBasedDoc); |
| } |
| |
| /// <summary> |
| /// Creates a normalized weight for a top-level <see cref="Query"/>. |
| /// The query is rewritten by this method and <see cref="Query.CreateWeight(IndexSearcher)"/> called, |
| /// afterwards the <see cref="Weight"/> is normalized. The returned <see cref="Weight"/> |
| /// can then directly be used to get a <see cref="Scorer"/>. |
| /// <para/> |
| /// @lucene.internal |
| /// </summary> |
| public virtual Weight CreateNormalizedWeight(Query query) |
| { |
| query = Rewrite(query); |
| Weight weight = query.CreateWeight(this); |
| float v = weight.GetValueForNormalization(); |
| float norm = Similarity.QueryNorm(v); |
| if (float.IsInfinity(norm) || float.IsNaN(norm)) |
| { |
| norm = 1.0f; |
| } |
| weight.Normalize(norm, 1.0f); |
| return weight; |
| } |
| |
| /// <summary> |
| /// Returns this searchers the top-level <see cref="IndexReaderContext"/>. </summary> |
| /// <seealso cref="IndexReader.Context"/> |
| /* sugar for #getReader().getTopReaderContext() */ |
| |
| public virtual IndexReaderContext TopReaderContext => m_readerContext; |
| |
| /// <summary> |
| /// A thread subclass for searching a single searchable |
| /// </summary> |
| private sealed class SearcherCallableNoSort : ICallable<TopDocs> |
| { |
| private readonly ReentrantLock @lock; |
| private readonly IndexSearcher searcher; |
| private readonly Weight weight; |
| private readonly ScoreDoc after; |
| private readonly int nDocs; |
| private readonly HitQueue hq; |
| private readonly LeafSlice slice; |
| |
| public SearcherCallableNoSort(ReentrantLock @lock, IndexSearcher searcher, LeafSlice slice, Weight weight, ScoreDoc after, int nDocs, HitQueue hq) |
| { |
| this.@lock = @lock; |
| this.searcher = searcher; |
| this.weight = weight; |
| this.after = after; |
| this.nDocs = nDocs; |
| this.hq = hq; |
| this.slice = slice; |
| } |
| |
| public TopDocs Call() |
| { |
| TopDocs docs = searcher.Search(slice.Leaves, weight, after, nDocs); |
| ScoreDoc[] scoreDocs = docs.ScoreDocs; |
| //it would be so nice if we had a thread-safe insert |
| @lock.Lock(); |
| try |
| { |
| for (int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq |
| { |
| ScoreDoc scoreDoc = scoreDocs[j]; |
| if (scoreDoc == hq.InsertWithOverflow(scoreDoc)) |
| { |
| break; |
| } |
| } |
| } |
| finally |
| { |
| @lock.Unlock(); |
| } |
| return docs; |
| } |
| } |
| |
| /// <summary> |
| /// A thread subclass for searching a single searchable |
| /// </summary> |
| private sealed class SearcherCallableWithSort : ICallable<TopFieldDocs> |
| { |
| private readonly ReentrantLock @lock; |
| private readonly IndexSearcher searcher; |
| private readonly Weight weight; |
| private readonly int nDocs; |
| private readonly TopFieldCollector hq; |
| private readonly Sort sort; |
| private readonly LeafSlice slice; |
| private readonly FieldDoc after; |
| private readonly bool doDocScores; |
| private readonly bool doMaxScore; |
| |
| public SearcherCallableWithSort(ReentrantLock @lock, IndexSearcher searcher, LeafSlice slice, Weight weight, FieldDoc after, int nDocs, TopFieldCollector hq, Sort sort, bool doDocScores, bool doMaxScore) |
| { |
| this.@lock = @lock; |
| this.searcher = searcher; |
| this.weight = weight; |
| this.nDocs = nDocs; |
| this.hq = hq; |
| this.sort = sort; |
| this.slice = slice; |
| this.after = after; |
| this.doDocScores = doDocScores; |
| this.doMaxScore = doMaxScore; |
| } |
| |
| private readonly FakeScorer fakeScorer = new FakeScorer(); |
| |
| public TopFieldDocs Call() |
| { |
| if (Debugging.AssertsEnabled) Debugging.Assert(slice.Leaves.Length == 1); |
| TopFieldDocs docs = searcher.Search(slice.Leaves, weight, after, nDocs, sort, true, doDocScores || sort.NeedsScores, doMaxScore); |
| @lock.Lock(); |
| try |
| { |
| AtomicReaderContext ctx = slice.Leaves[0]; |
| int @base = ctx.DocBase; |
| hq.SetNextReader(ctx); |
| hq.SetScorer(fakeScorer); |
| foreach (ScoreDoc scoreDoc in docs.ScoreDocs) |
| { |
| fakeScorer.doc = scoreDoc.Doc - @base; |
| fakeScorer.score = scoreDoc.Score; |
| hq.Collect(scoreDoc.Doc - @base); |
| } |
| |
| // Carry over maxScore from sub: |
| if (doMaxScore && docs.MaxScore > hq.maxScore) |
| { |
| hq.maxScore = docs.MaxScore; |
| } |
| } |
| finally |
| { |
| @lock.Unlock(); |
| } |
| return docs; |
| } |
| } |
| |
| /// <summary> |
| /// A helper class that wraps a <see cref="ICompletionService{T}"/> and provides an |
| /// iterable interface to the completed <see cref="ICallable{V}"/> instances. |
| /// </summary> |
| /// <typeparam name="T">the type of the <see cref="ICallable{V}"/> return value</typeparam> |
| private sealed class ExecutionHelper<T> : IEnumerator<T>, IEnumerable<T> |
| { |
| private readonly ICompletionService<T> service; |
| private int numTasks; |
| private T current; |
| |
| internal ExecutionHelper(TaskScheduler executor) |
| { |
| this.service = new TaskSchedulerCompletionService<T>(executor); |
| } |
| |
| public T Current => current; |
| |
| object IEnumerator.Current => current; |
| |
| public void Dispose() |
| { |
| } |
| |
| public void Submit(ICallable<T> task) |
| { |
| this.service.Submit(task); |
| ++numTasks; |
| } |
| |
| public void Reset() |
| { |
| throw new NotSupportedException(); |
| } |
| |
| public bool MoveNext() |
| { |
| if (numTasks > 0) |
| { |
| try |
| { |
| var awaitable = service.Take(); |
| awaitable.Wait(); |
| current = awaitable.Result; |
| |
| return true; |
| } |
| #if FEATURE_THREAD_INTERRUPT |
| catch (ThreadInterruptedException /*e*/) |
| { |
| //throw new ThreadInterruptedException(e.ToString(), e); |
| throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) |
| } |
| #endif |
| catch (Exception e) |
| { |
| // LUCENENET NOTE: We need to re-throw this as Exception to |
| // ensure it is not caught in the wrong place |
| throw new Exception(e.ToString(), e); |
| } |
| finally |
| { |
| --numTasks; |
| } |
| } |
| |
| return false; |
| } |
| |
| // LUCENENET NOTE: Not supported in .NET anyway |
| //public override void Remove() |
| //{ |
| // throw new NotSupportedException(); |
| //} |
| |
| public IEnumerator<T> GetEnumerator() |
| { |
| // use the shortcut here - this is only used in a private context |
| return this; |
| } |
| |
| IEnumerator IEnumerable.GetEnumerator() |
| { |
| return this; |
| } |
| } |
| |
| /// <summary> |
| /// A class holding a subset of the <see cref="IndexSearcher"/>s leaf contexts to be |
| /// executed within a single thread. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| public class LeafSlice |
| { |
| internal AtomicReaderContext[] Leaves { get; private set; } |
| |
| public LeafSlice(params AtomicReaderContext[] leaves) |
| { |
| this.Leaves = leaves; |
| } |
| } |
| |
| public override string ToString() |
| { |
| return "IndexSearcher(" + reader + "; executor=" + executor + ")"; |
| } |
| |
| /// <summary> |
| /// Returns <see cref="Search.TermStatistics"/> for a term. |
| /// <para/> |
| /// This can be overridden for example, to return a term's statistics |
| /// across a distributed collection. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| public virtual TermStatistics TermStatistics(Term term, TermContext context) |
| { |
| return new TermStatistics(term.Bytes, context.DocFreq, context.TotalTermFreq); |
| } |
| |
| /// <summary> |
| /// Returns <see cref="Search.CollectionStatistics"/> for a field. |
| /// <para/> |
| /// This can be overridden for example, to return a field's statistics |
| /// across a distributed collection. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| public virtual CollectionStatistics CollectionStatistics(string field) |
| { |
| int docCount; |
| long sumTotalTermFreq; |
| long sumDocFreq; |
| |
| if (Debugging.AssertsEnabled) Debugging.Assert(field != null); |
| |
| Terms terms = MultiFields.GetTerms(reader, field); |
| if (terms == null) |
| { |
| docCount = 0; |
| sumTotalTermFreq = 0; |
| sumDocFreq = 0; |
| } |
| else |
| { |
| docCount = terms.DocCount; |
| sumTotalTermFreq = terms.SumTotalTermFreq; |
| sumDocFreq = terms.SumDocFreq; |
| } |
| return new CollectionStatistics(field, reader.MaxDoc, docCount, sumTotalTermFreq, sumDocFreq); |
| } |
| } |
| } |