src/Lucene.Net/Search/Collector.cs - lucenenet - Git at Google

 using System;

 namespace Lucene.Net.Search
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;

     /// <summary>
     /// <para>Expert: Collectors are primarily meant to be used to
     /// gather raw results from a search, and implement sorting
     /// or custom result filtering, collation, etc. </para>
     ///
     /// <para>Lucene's core collectors are derived from Collector.
     /// Likely your application can use one of these classes, or
     /// subclass <see cref="TopDocsCollector{T}"/>, instead of
     /// implementing <see cref="ICollector"/> directly:
     ///
     /// <list type="bullet">
     ///
     ///   <item><description><see cref="TopDocsCollector{T}"/> is an abstract base class
     ///   that assumes you will retrieve the top N docs,
     ///   according to some criteria, after collection is
     ///   done.  </description></item>
     ///
     ///   <item><description><see cref="TopScoreDocCollector"/> is a concrete subclass
     ///   <see cref="TopDocsCollector{T}"/> and sorts according to score +
     ///   docID.  This is used internally by the
     ///   <see cref="IndexSearcher"/> search methods that do not take an
     ///   explicit <see cref="Sort"/>. It is likely the most frequently
     ///   used collector.</description></item>
     ///
     ///   <item><description><see cref="TopFieldCollector"/> subclasses
     ///   <see cref="TopDocsCollector{T}"/> and sorts according to a specified
     ///   <see cref="Sort"/> object (sort by field).  This is used
     ///   internally by the <see cref="IndexSearcher"/> search methods
     ///   that take an explicit <see cref="Sort"/>.</description></item>
     ///
     ///   <item><description><see cref="TimeLimitingCollector"/>, which wraps any other
     ///   Collector and aborts the search if it's taken too much
     ///   time.</description></item>
     ///
     ///   <item><description><see cref="PositiveScoresOnlyCollector"/> wraps any other
     ///   <see cref="ICollector"/> and prevents collection of hits whose score
     ///   is &lt;= 0.0</description></item>
     ///
     /// </list>
     /// </para>
     ///
     /// <para><see cref="ICollector"/> decouples the score from the collected doc:
     /// the score computation is skipped entirely if it's not
     /// needed.  Collectors that do need the score should
     /// implement the <see cref="SetScorer(Scorer)"/> method, to hold onto the
     /// passed <see cref="Scorer"/> instance, and call
     /// <see cref="Scorer.GetScore()"/> within the collect method to compute the
     /// current hit's score.  If your collector may request the
     /// score for a single hit multiple times, you should use
     /// <see cref="ScoreCachingWrappingScorer"/>. </para>
     ///
     /// <para><b>NOTE:</b> The doc that is passed to the collect
     /// method is relative to the current reader. If your
     /// collector needs to resolve this to the docID space of the
     /// Multi*Reader, you must re-base it by recording the
     /// docBase from the most recent <see cref="SetNextReader(AtomicReaderContext)"/> call.  Here's
     /// a simple example showing how to collect docIDs into an
     /// <see cref="Util.OpenBitSet"/>:</para>
     ///
     /// <code>
     /// private class MySearchCollector : ICollector
     /// {
     ///     private readonly OpenBitSet bits;
     ///     private int docBase;
     ///
     ///     public MySearchCollector(OpenBitSet bits)
     ///     {
     ///         if (bits == null) throw new ArgumentNullException("bits");
     ///         this.bits = bits;
     ///     }
     ///
     ///     // ignore scorer
     ///     public void SetScorer(Scorer scorer)
     ///     {
     ///     }
     ///
     ///     // accept docs out of order (for a BitSet it doesn't matter)
     ///     public bool AcceptDocsOutOfOrder
     ///     {
     ///         get { return true; }
     ///     }
     ///
     ///     public void Collect(int doc)
     ///     {
     ///         bits.Set(doc + docBase);
     ///     }
     ///
     ///     public void SetNextReader(AtomicReaderContext context)
     ///     {
     ///         this.docBase = context.DocBase;
     ///     }
     /// }
     ///
     /// IndexSearcher searcher = new IndexSearcher(indexReader);
     /// OpenBitSet bits = new OpenBitSet(indexReader.MaxDoc);
     /// searcher.Search(query, new MySearchCollector(bits));
     /// </code>
     ///
     /// <para>Not all collectors will need to rebase the docID.  For
     /// example, a collector that simply counts the total number
     /// of hits would skip it.</para>
     ///
     /// <para><b>NOTE:</b> Prior to 2.9, Lucene silently filtered
     /// out hits with score &lt;= 0.  As of 2.9, the core <see cref="ICollector"/>s
     /// no longer do that.  It's very unusual to have such hits
     /// (a negative query boost, or function query returning
     /// negative custom scores, could cause it to happen).  If
     /// you need that behavior, use
     /// <see cref="PositiveScoresOnlyCollector"/>.</para>
     ///
     /// @lucene.experimental
     /// <para/>
     /// @since 2.9
     /// </summary>
     public interface ICollector // LUCENENET NOTE: This was an abstract class in Lucene, but made into an interface since we need one for Grouping's covariance
     {
         /// <summary>
         /// Called before successive calls to <see cref="Collect(int)"/>. Implementations
         /// that need the score of the current document (passed-in to
         /// <see cref="Collect(int)"/>), should save the passed-in <see cref="Scorer"/> and call
         /// <c>scorer.GetScore()</c> when needed.
         /// </summary>
         void SetScorer(Scorer scorer);

         /// <summary>
         /// Called once for every document matching a query, with the unbased document
         /// number.
         /// <para/>Note: The collection of the current segment can be terminated by throwing
         /// a <see cref="CollectionTerminatedException"/>. In this case, the last docs of the
         /// current <see cref="AtomicReaderContext"/> will be skipped and <see cref="IndexSearcher"/>
         /// will swallow the exception and continue collection with the next leaf.
         /// <para/>
         /// Note: this is called in an inner search loop. For good search performance,
         /// implementations of this method should not call <see cref="IndexSearcher.Doc(int)"/> or
         /// <see cref="Lucene.Net.Index.IndexReader.Document(int)"/> on every hit.
         /// Doing so can slow searches by an order of magnitude or more.
         /// </summary>
         void Collect(int doc);

         /// <summary>
         /// Called before collecting from each <see cref="AtomicReaderContext"/>. All doc ids in
         /// <see cref="Collect(int)"/> will correspond to <see cref="Index.IndexReaderContext.Reader"/>.
         /// <para/>
         /// Add <see cref="AtomicReaderContext.DocBase"/> to the current <see cref="Index.IndexReaderContext.Reader"/>'s
         /// internal document id to re-base ids in <see cref="Collect(int)"/>.
         /// </summary>
         /// <param name="context">next atomic reader context </param>
         void SetNextReader(AtomicReaderContext context);

         /// <summary>
         /// Return <c>true</c> if this collector does not
         /// require the matching docIDs to be delivered in int sort
         /// order (smallest to largest) to <see cref="Collect"/>.
         ///
         /// <para> Most Lucene Query implementations will visit
         /// matching docIDs in order.  However, some queries
         /// (currently limited to certain cases of <see cref="BooleanQuery"/>)
         /// can achieve faster searching if the
         /// <see cref="ICollector"/> allows them to deliver the
         /// docIDs out of order.</para>
         ///
         /// <para> Many collectors don't mind getting docIDs out of
         /// order, so it's important to return <c>true</c>
         /// here.</para>
         /// </summary>
         bool AcceptsDocsOutOfOrder { get; }
     }

     /// <summary>
     /// LUCENENET specific class used to hold the
     /// <see cref="NewAnonymous(Action{Scorer}, Action{int}, Action{AtomicReaderContext}, Func{bool})"/> static method.
     /// </summary>
     public static class Collector
     {
         /// <summary>
         /// Creates a new instance with the ability to specify the body of the <see cref="ICollector.SetScorer(Scorer)"/>
         /// method through the <paramref name="setScorer"/> parameter, the body of the <see cref="ICollector.Collect(int)"/>
         /// method through the <paramref name="collect"/> parameter, the body of the <see cref="ICollector.SetNextReader(AtomicReaderContext)"/>
         /// method through the <paramref name="setNextReader"/> parameter, and the body of the <see cref="ICollector.AcceptsDocsOutOfOrder"/>
         /// property through the <paramref name="acceptsDocsOutOfOrder"/> parameter.
         /// Simple example:
         /// <code>
         ///     IndexSearcher searcher = new IndexSearcher(indexReader);
         ///     OpenBitSet bits = new OpenBitSet(indexReader.MaxDoc);
         ///     int docBase;
         ///     searcher.Search(query,
         ///         Collector.NewAnonymous(setScorer: (scorer) =>
         ///         {
         ///             // ignore scorer
         ///         }, collect: (doc) =>
         ///         {
         ///             bits.Set(doc + docBase);
         ///         }, setNextReader: (context) =>
         ///         {
         ///             docBase = context.DocBase;
         ///         }, acceptsDocsOutOfOrder: () =>
         ///         {
         ///             return true;
         ///         })
         ///     );
         /// </code>
         /// </summary>
         /// <param name="setScorer">
         /// A delegate method that represents (is called by) the <see cref="ICollector.SetScorer(Scorer)"/>
         /// method. It accepts a <see cref="Scorer"/> scorer and
         /// has no return value.
         /// </param>
         /// <param name="collect">
         /// A delegate method that represents (is called by) the <see cref="ICollector.Collect(int)"/>
         /// method. It accepts an <see cref="int"/> doc and
         /// has no return value.
         /// </param>
         /// <param name="setNextReader">
         /// A delegate method that represents (is called by) the <see cref="ICollector.SetNextReader(AtomicReaderContext)"/>
         /// method. It accepts a <see cref="AtomicReaderContext"/> context and
         /// has no return value.
         /// </param>
         /// <param name="acceptsDocsOutOfOrder">
         /// A delegate method that represents (is called by) the <see cref="ICollector.AcceptsDocsOutOfOrder"/>
         /// property. It returns a <see cref="bool"/> value.
         /// </param>
         /// <returns> A new <see cref="AnonymousCollector"/> instance. </returns>
         public static ICollector NewAnonymous(Action<Scorer> setScorer, Action<int> collect, Action<AtomicReaderContext> setNextReader, Func<bool> acceptsDocsOutOfOrder)
         {
             return new AnonymousCollector(setScorer, collect, setNextReader, acceptsDocsOutOfOrder);
         }

         // LUCENENET specific
         private class AnonymousCollector : ICollector
         {
             private readonly Action<Scorer> setScorer;
             private readonly Action<int> collect;
             private readonly Action<AtomicReaderContext> setNextReader;
             private readonly Func<bool> acceptsDocsOutOfOrder;

             public AnonymousCollector(Action<Scorer> setScorer, Action<int> collect, Action<AtomicReaderContext> setNextReader, Func<bool> acceptsDocsOutOfOrder)
             {
                 if (setScorer == null)
                     throw new ArgumentNullException("setScorer");
                 if (collect == null)
                     throw new ArgumentNullException("collect");
                 if (setNextReader == null)
                     throw new ArgumentNullException("setNextReader");
                 if (acceptsDocsOutOfOrder == null)
                     throw new ArgumentNullException("acceptsDocsOutOfOrder");

                 this.setScorer = setScorer;
                 this.collect = collect;
                 this.setNextReader = setNextReader;
                 this.acceptsDocsOutOfOrder = acceptsDocsOutOfOrder;
             }

             public bool AcceptsDocsOutOfOrder => this.acceptsDocsOutOfOrder();

             public void Collect(int doc)
             {
                 this.collect(doc);
             }

             public void SetNextReader(AtomicReaderContext context)
             {
                 this.setNextReader(context);
             }

             public void SetScorer(Scorer scorer)
             {
                 this.setScorer(scorer);
             }
         }
     }
 }
	using System;

	namespace Lucene.Net.Search
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;

	/// <summary>
	/// <para>Expert: Collectors are primarily meant to be used to
	/// gather raw results from a search, and implement sorting
	/// or custom result filtering, collation, etc. </para>
	///
	/// <para>Lucene's core collectors are derived from Collector.
	/// Likely your application can use one of these classes, or
	/// subclass <see cref="TopDocsCollector{T}"/>, instead of
	/// implementing <see cref="ICollector"/> directly:
	///
	/// <list type="bullet">
	///
	/// <item><description><see cref="TopDocsCollector{T}"/> is an abstract base class
	/// that assumes you will retrieve the top N docs,
	/// according to some criteria, after collection is
	/// done. </description></item>
	///
	/// <item><description><see cref="TopScoreDocCollector"/> is a concrete subclass
	/// <see cref="TopDocsCollector{T}"/> and sorts according to score +
	/// docID. This is used internally by the
	/// <see cref="IndexSearcher"/> search methods that do not take an
	/// explicit <see cref="Sort"/>. It is likely the most frequently
	/// used collector.</description></item>
	///
	/// <item><description><see cref="TopFieldCollector"/> subclasses
	/// <see cref="TopDocsCollector{T}"/> and sorts according to a specified
	/// <see cref="Sort"/> object (sort by field). This is used
	/// internally by the <see cref="IndexSearcher"/> search methods
	/// that take an explicit <see cref="Sort"/>.</description></item>
	///
	/// <item><description><see cref="TimeLimitingCollector"/>, which wraps any other
	/// Collector and aborts the search if it's taken too much
	/// time.</description></item>
	///
	/// <item><description><see cref="PositiveScoresOnlyCollector"/> wraps any other
	/// <see cref="ICollector"/> and prevents collection of hits whose score
	/// is <= 0.0</description></item>
	///
	/// </list>
	/// </para>
	///
	/// <para><see cref="ICollector"/> decouples the score from the collected doc:
	/// the score computation is skipped entirely if it's not
	/// needed. Collectors that do need the score should
	/// implement the <see cref="SetScorer(Scorer)"/> method, to hold onto the
	/// passed <see cref="Scorer"/> instance, and call
	/// <see cref="Scorer.GetScore()"/> within the collect method to compute the
	/// current hit's score. If your collector may request the
	/// score for a single hit multiple times, you should use
	/// <see cref="ScoreCachingWrappingScorer"/>. </para>
	///
	/// <para><b>NOTE:</b> The doc that is passed to the collect
	/// method is relative to the current reader. If your
	/// collector needs to resolve this to the docID space of the
	/// Multi*Reader, you must re-base it by recording the
	/// docBase from the most recent <see cref="SetNextReader(AtomicReaderContext)"/> call. Here's
	/// a simple example showing how to collect docIDs into an
	/// <see cref="Util.OpenBitSet"/>:</para>
	///
	/// <code>
	/// private class MySearchCollector : ICollector
	/// {
	/// private readonly OpenBitSet bits;
	/// private int docBase;
	///
	/// public MySearchCollector(OpenBitSet bits)
	/// {
	/// if (bits == null) throw new ArgumentNullException("bits");
	/// this.bits = bits;
	/// }
	///
	/// // ignore scorer
	/// public void SetScorer(Scorer scorer)
	/// {
	/// }
	///
	/// // accept docs out of order (for a BitSet it doesn't matter)
	/// public bool AcceptDocsOutOfOrder
	/// {
	/// get { return true; }
	/// }
	///
	/// public void Collect(int doc)
	/// {
	/// bits.Set(doc + docBase);
	/// }
	///
	/// public void SetNextReader(AtomicReaderContext context)
	/// {
	/// this.docBase = context.DocBase;
	/// }
	/// }
	///
	/// IndexSearcher searcher = new IndexSearcher(indexReader);
	/// OpenBitSet bits = new OpenBitSet(indexReader.MaxDoc);
	/// searcher.Search(query, new MySearchCollector(bits));
	/// </code>
	///
	/// <para>Not all collectors will need to rebase the docID. For
	/// example, a collector that simply counts the total number
	/// of hits would skip it.</para>
	///
	/// <para><b>NOTE:</b> Prior to 2.9, Lucene silently filtered
	/// out hits with score <= 0. As of 2.9, the core <see cref="ICollector"/>s
	/// no longer do that. It's very unusual to have such hits
	/// (a negative query boost, or function query returning
	/// negative custom scores, could cause it to happen). If
	/// you need that behavior, use
	/// <see cref="PositiveScoresOnlyCollector"/>.</para>
	///
	/// @lucene.experimental
	/// <para/>
	/// @since 2.9
	/// </summary>
	public interface ICollector // LUCENENET NOTE: This was an abstract class in Lucene, but made into an interface since we need one for Grouping's covariance
	{
	/// <summary>
	/// Called before successive calls to <see cref="Collect(int)"/>. Implementations
	/// that need the score of the current document (passed-in to
	/// <see cref="Collect(int)"/>), should save the passed-in <see cref="Scorer"/> and call
	/// <c>scorer.GetScore()</c> when needed.
	/// </summary>
	void SetScorer(Scorer scorer);

	/// <summary>
	/// Called once for every document matching a query, with the unbased document
	/// number.
	/// <para/>Note: The collection of the current segment can be terminated by throwing
	/// a <see cref="CollectionTerminatedException"/>. In this case, the last docs of the
	/// current <see cref="AtomicReaderContext"/> will be skipped and <see cref="IndexSearcher"/>
	/// will swallow the exception and continue collection with the next leaf.
	/// <para/>
	/// Note: this is called in an inner search loop. For good search performance,
	/// implementations of this method should not call <see cref="IndexSearcher.Doc(int)"/> or
	/// <see cref="Lucene.Net.Index.IndexReader.Document(int)"/> on every hit.
	/// Doing so can slow searches by an order of magnitude or more.
	/// </summary>
	void Collect(int doc);

	/// <summary>
	/// Called before collecting from each <see cref="AtomicReaderContext"/>. All doc ids in
	/// <see cref="Collect(int)"/> will correspond to <see cref="Index.IndexReaderContext.Reader"/>.
	/// <para/>
	/// Add <see cref="AtomicReaderContext.DocBase"/> to the current <see cref="Index.IndexReaderContext.Reader"/>'s
	/// internal document id to re-base ids in <see cref="Collect(int)"/>.
	/// </summary>
	/// <param name="context">next atomic reader context </param>
	void SetNextReader(AtomicReaderContext context);

	/// <summary>
	/// Return <c>true</c> if this collector does not
	/// require the matching docIDs to be delivered in int sort
	/// order (smallest to largest) to <see cref="Collect"/>.
	///
	/// <para> Most Lucene Query implementations will visit
	/// matching docIDs in order. However, some queries
	/// (currently limited to certain cases of <see cref="BooleanQuery"/>)
	/// can achieve faster searching if the
	/// <see cref="ICollector"/> allows them to deliver the
	/// docIDs out of order.</para>
	///
	/// <para> Many collectors don't mind getting docIDs out of
	/// order, so it's important to return <c>true</c>
	/// here.</para>
	/// </summary>
	bool AcceptsDocsOutOfOrder { get; }
	}

	/// <summary>
	/// LUCENENET specific class used to hold the
	/// <see cref="NewAnonymous(Action{Scorer}, Action{int}, Action{AtomicReaderContext}, Func{bool})"/> static method.
	/// </summary>
	public static class Collector
	{
	/// <summary>
	/// Creates a new instance with the ability to specify the body of the <see cref="ICollector.SetScorer(Scorer)"/>
	/// method through the <paramref name="setScorer"/> parameter, the body of the <see cref="ICollector.Collect(int)"/>
	/// method through the <paramref name="collect"/> parameter, the body of the <see cref="ICollector.SetNextReader(AtomicReaderContext)"/>
	/// method through the <paramref name="setNextReader"/> parameter, and the body of the <see cref="ICollector.AcceptsDocsOutOfOrder"/>
	/// property through the <paramref name="acceptsDocsOutOfOrder"/> parameter.
	/// Simple example:
	/// <code>
	/// IndexSearcher searcher = new IndexSearcher(indexReader);
	/// OpenBitSet bits = new OpenBitSet(indexReader.MaxDoc);
	/// int docBase;
	/// searcher.Search(query,
	/// Collector.NewAnonymous(setScorer: (scorer) =>
	/// {
	/// // ignore scorer
	/// }, collect: (doc) =>
	/// {
	/// bits.Set(doc + docBase);
	/// }, setNextReader: (context) =>
	/// {
	/// docBase = context.DocBase;
	/// }, acceptsDocsOutOfOrder: () =>
	/// {
	/// return true;
	/// })
	/// );
	/// </code>
	/// </summary>
	/// <param name="setScorer">
	/// A delegate method that represents (is called by) the <see cref="ICollector.SetScorer(Scorer)"/>
	/// method. It accepts a <see cref="Scorer"/> scorer and
	/// has no return value.
	/// </param>
	/// <param name="collect">
	/// A delegate method that represents (is called by) the <see cref="ICollector.Collect(int)"/>
	/// method. It accepts an <see cref="int"/> doc and
	/// has no return value.
	/// </param>
	/// <param name="setNextReader">
	/// A delegate method that represents (is called by) the <see cref="ICollector.SetNextReader(AtomicReaderContext)"/>
	/// method. It accepts a <see cref="AtomicReaderContext"/> context and
	/// has no return value.
	/// </param>
	/// <param name="acceptsDocsOutOfOrder">
	/// A delegate method that represents (is called by) the <see cref="ICollector.AcceptsDocsOutOfOrder"/>
	/// property. It returns a <see cref="bool"/> value.
	/// </param>
	/// <returns> A new <see cref="AnonymousCollector"/> instance. </returns>
	public static ICollector NewAnonymous(Action<Scorer> setScorer, Action<int> collect, Action<AtomicReaderContext> setNextReader, Func<bool> acceptsDocsOutOfOrder)
	{
	return new AnonymousCollector(setScorer, collect, setNextReader, acceptsDocsOutOfOrder);
	}

	// LUCENENET specific
	private class AnonymousCollector : ICollector
	{
	private readonly Action<Scorer> setScorer;
	private readonly Action<int> collect;
	private readonly Action<AtomicReaderContext> setNextReader;
	private readonly Func<bool> acceptsDocsOutOfOrder;

	public AnonymousCollector(Action<Scorer> setScorer, Action<int> collect, Action<AtomicReaderContext> setNextReader, Func<bool> acceptsDocsOutOfOrder)
	{
	if (setScorer == null)
	throw new ArgumentNullException("setScorer");
	if (collect == null)
	throw new ArgumentNullException("collect");
	if (setNextReader == null)
	throw new ArgumentNullException("setNextReader");
	if (acceptsDocsOutOfOrder == null)
	throw new ArgumentNullException("acceptsDocsOutOfOrder");

	this.setScorer = setScorer;
	this.collect = collect;
	this.setNextReader = setNextReader;
	this.acceptsDocsOutOfOrder = acceptsDocsOutOfOrder;
	}

	public bool AcceptsDocsOutOfOrder => this.acceptsDocsOutOfOrder();

	public void Collect(int doc)
	{
	this.collect(doc);
	}

	public void SetNextReader(AtomicReaderContext context)
	{
	this.setNextReader(context);
	}

	public void SetScorer(Scorer scorer)
	{
	this.setScorer(scorer);
	}
	}
	}
	}