src/Lucene.Net.Highlighter/Highlight/QueryTermScorer.cs - lucenenet - Git at Google

 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Index;
 using System;
 using System.Collections.Generic;
 using JCG = J2N.Collections.Generic;

 namespace Lucene.Net.Search.Highlight
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// <see cref="IScorer"/> implementation which scores text fragments by the number of
     /// unique query terms found. This class uses the <see cref="QueryTermExtractor"/>
     /// class to process determine the query terms and their boosts to be used.
     /// </summary>
     // TODO: provide option to boost score of fragments near beginning of document
     // based on fragment.getFragNum()
     public class QueryTermScorer : IScorer
     {
         //private TextFragment currentTextFragment = null; // LUCENENET: Not used
         private ISet<string> uniqueTermsInFragment;

         private float totalScore = 0;
         private readonly float maxTermWeight = 0;
         private readonly IDictionary<string, WeightedTerm> termsToFind;

         private ICharTermAttribute termAtt;

         /// <param name="query">
         /// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
         /// being passed to this class and the searcher)
         /// </param>
         public QueryTermScorer(Query query)
             : this(QueryTermExtractor.GetTerms(query))
         {
         }

         /// <param name="query">
         /// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
         /// being passed to this class and the searcher)
         /// </param>
         /// <param name="fieldName">the Field name which is used to match Query terms</param>
         public QueryTermScorer(Query query, string fieldName)
             : this(QueryTermExtractor.GetTerms(query, false, fieldName))
         {
         }

         /// <param name="query">
         /// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
         /// being passed to this class and the searcher)
         /// </param>
         /// <param name="reader">
         /// used to compute IDF which can be used to a) score selected
         /// fragments better b) use graded highlights eg set font color
         /// intensity
         /// </param>
         /// <param name="fieldName">
         /// the field on which Inverse Document Frequency (IDF)
         /// calculations are based
         /// </param>
         public QueryTermScorer(Query query, IndexReader reader, string fieldName)
             : this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName))
         {
         }

         public QueryTermScorer(WeightedTerm[] weightedTerms)
         {
             termsToFind = new Dictionary<string, WeightedTerm>();
             for (int i = 0; i < weightedTerms.Length; i++)
             {
                 if (!termsToFind.TryGetValue(weightedTerms[i].Term, out WeightedTerm existingTerm)
                     || (existingTerm == null)
                     || (existingTerm.Weight < weightedTerms[i].Weight))
                 {
                     // if a term is defined more than once, always use the highest scoring
                     // Weight
                     termsToFind[weightedTerms[i].Term] = weightedTerms[i];
                     maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].Weight);
                 }
             }
         }

         /// <summary>
         /// <seealso cref="IScorer.Init(TokenStream)"/>
         /// </summary>
         public virtual TokenStream Init(TokenStream tokenStream)
         {
             termAtt = tokenStream.AddAttribute<ICharTermAttribute>();
             return null;
         }

         public virtual void StartFragment(TextFragment newFragment)
         {
             uniqueTermsInFragment = new JCG.HashSet<string>();
             //currentTextFragment = newFragment; // LUCENENET: Not used
             totalScore = 0;
         }

         /// <summary>
         /// <seealso cref="IScorer.GetTokenScore()"/>
         /// </summary>
         public virtual float GetTokenScore()
         {
             string termText = termAtt.ToString();

             if (!termsToFind.TryGetValue(termText, out WeightedTerm queryTerm) || queryTerm == null)
             {
                 // not a query term - return
                 return 0;
             }
             // found a query term - is it unique in this doc?
             if (!uniqueTermsInFragment.Contains(termText))
             {
                 totalScore += queryTerm.Weight;
                 uniqueTermsInFragment.Add(termText);
             }
             return queryTerm.Weight;
         }

         /// <summary>
         /// <seealso cref="IScorer.FragmentScore"/>
         /// </summary>
         public virtual float FragmentScore => totalScore;

         public virtual void AllFragmentsProcessed()
         {
             // this class has no special operations to perform at end of processing
         }

         /// <summary>
         /// The highest weighted term (useful for passing to <see cref="GradientFormatter"/>
         /// to set top end of coloring scale.
         /// </summary>
         public virtual float MaxTermWeight => maxTermWeight;
     }
 }
	using Lucene.Net.Analysis;
	using Lucene.Net.Analysis.TokenAttributes;
	using Lucene.Net.Index;
	using System;
	using System.Collections.Generic;
	using JCG = J2N.Collections.Generic;

	namespace Lucene.Net.Search.Highlight
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// <see cref="IScorer"/> implementation which scores text fragments by the number of
	/// unique query terms found. This class uses the <see cref="QueryTermExtractor"/>
	/// class to process determine the query terms and their boosts to be used.
	/// </summary>
	// TODO: provide option to boost score of fragments near beginning of document
	// based on fragment.getFragNum()
	public class QueryTermScorer : IScorer
	{
	//private TextFragment currentTextFragment = null; // LUCENENET: Not used
	private ISet<string> uniqueTermsInFragment;

	private float totalScore = 0;
	private readonly float maxTermWeight = 0;
	private readonly IDictionary<string, WeightedTerm> termsToFind;

	private ICharTermAttribute termAtt;

	/// <param name="query">
	/// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
	/// being passed to this class and the searcher)
	/// </param>
	public QueryTermScorer(Query query)
	: this(QueryTermExtractor.GetTerms(query))
	{
	}

	/// <param name="query">
	/// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
	/// being passed to this class and the searcher)
	/// </param>
	/// <param name="fieldName">the Field name which is used to match Query terms</param>
	public QueryTermScorer(Query query, string fieldName)
	: this(QueryTermExtractor.GetTerms(query, false, fieldName))
	{
	}

	/// <param name="query">
	/// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
	/// being passed to this class and the searcher)
	/// </param>
	/// <param name="reader">
	/// used to compute IDF which can be used to a) score selected
	/// fragments better b) use graded highlights eg set font color
	/// intensity
	/// </param>
	/// <param name="fieldName">
	/// the field on which Inverse Document Frequency (IDF)
	/// calculations are based
	/// </param>
	public QueryTermScorer(Query query, IndexReader reader, string fieldName)
	: this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName))
	{
	}

	public QueryTermScorer(WeightedTerm[] weightedTerms)
	{
	termsToFind = new Dictionary<string, WeightedTerm>();
	for (int i = 0; i < weightedTerms.Length; i++)
	{
	if (!termsToFind.TryGetValue(weightedTerms[i].Term, out WeightedTerm existingTerm)
	\|\| (existingTerm == null)
	\|\| (existingTerm.Weight < weightedTerms[i].Weight))
	{
	// if a term is defined more than once, always use the highest scoring
	// Weight
	termsToFind[weightedTerms[i].Term] = weightedTerms[i];
	maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].Weight);
	}
	}
	}

	/// <summary>
	/// <seealso cref="IScorer.Init(TokenStream)"/>
	/// </summary>
	public virtual TokenStream Init(TokenStream tokenStream)
	{
	termAtt = tokenStream.AddAttribute<ICharTermAttribute>();
	return null;
	}

	public virtual void StartFragment(TextFragment newFragment)
	{
	uniqueTermsInFragment = new JCG.HashSet<string>();
	//currentTextFragment = newFragment; // LUCENENET: Not used
	totalScore = 0;
	}

	/// <summary>
	/// <seealso cref="IScorer.GetTokenScore()"/>
	/// </summary>
	public virtual float GetTokenScore()
	{
	string termText = termAtt.ToString();

	if (!termsToFind.TryGetValue(termText, out WeightedTerm queryTerm) \|\| queryTerm == null)
	{
	// not a query term - return
	return 0;
	}
	// found a query term - is it unique in this doc?
	if (!uniqueTermsInFragment.Contains(termText))
	{
	totalScore += queryTerm.Weight;
	uniqueTermsInFragment.Add(termText);
	}
	return queryTerm.Weight;
	}

	/// <summary>
	/// <seealso cref="IScorer.FragmentScore"/>
	/// </summary>
	public virtual float FragmentScore => totalScore;

	public virtual void AllFragmentsProcessed()
	{
	// this class has no special operations to perform at end of processing
	}

	/// <summary>
	/// The highest weighted term (useful for passing to <see cref="GradientFormatter"/>
	/// to set top end of coloring scale.
	/// </summary>
	public virtual float MaxTermWeight => maxTermWeight;
	}
	}