src/Lucene.Net/Search/Similarities/BM25Similarity.cs - lucenenet - Git at Google

 using System;

 namespace Lucene.Net.Search.Similarities
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
     using BytesRef = Lucene.Net.Util.BytesRef;
     using FieldInvertState = Lucene.Net.Index.FieldInvertState;
     using NumericDocValues = Lucene.Net.Index.NumericDocValues;
     using SmallSingle = Lucene.Net.Util.SmallSingle;

     /// <summary>
     /// BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker,
     /// Susan Jones, Micheline Hancock-Beaulieu, and Mike Gatford. Okapi at TREC-3.
     /// In Proceedings of the Third <b>T</b>ext <b>RE</b>trieval <b>C</b>onference (TREC 1994).
     /// Gaithersburg, USA, November 1994.
     /// <para/>
     /// @lucene.experimental
     /// </summary>
     public class BM25Similarity : Similarity
     {
         private readonly float k1;
         private readonly float b;
         // TODO: should we add a delta like sifaka.cs.uiuc.edu/~ylv2/pub/sigir11-bm25l.pdf ?

         /// <summary>
         /// BM25 with the supplied parameter values. </summary>
         /// <param name="k1"> Controls non-linear term frequency normalization (saturation). </param>
         /// <param name="b"> Controls to what degree document length normalizes tf values. </param>
         public BM25Similarity(float k1, float b)
         {
             this.k1 = k1;
             this.b = b;
         }

         /// <summary>
         /// BM25 with these default values:
         /// <list type="bullet">
         ///   <item><description><c>k1 = 1.2</c>,</description></item>
         ///   <item><description><c>b = 0.75</c>.</description></item>
         /// </list>
         /// </summary>
         public BM25Similarity()
         {
             this.k1 = 1.2f;
             this.b = 0.75f;
         }

         /// <summary>
         /// Implemented as <c>log(1 + (numDocs - docFreq + 0.5)/(docFreq + 0.5))</c>. </summary>
         protected internal virtual float Idf(long docFreq, long numDocs)
         {
             return (float)Math.Log(1 + (numDocs - docFreq + 0.5D) / (docFreq + 0.5D));
         }

         /// <summary>
         /// Implemented as <c>1 / (distance + 1)</c>. </summary>
         protected internal virtual float SloppyFreq(int distance)
         {
             return 1.0f / (distance + 1);
         }

         /// <summary>
         /// The default implementation returns <c>1</c> </summary>
         protected internal virtual float ScorePayload(int doc, int start, int end, BytesRef payload)
         {
             return 1;
         }

         /// <summary>
         /// The default implementation computes the average as <c>sumTotalTermFreq / maxDoc</c>,
         /// or returns <c>1</c> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
         /// or any field that omits frequency information).
         /// </summary>
         protected internal virtual float AvgFieldLength(CollectionStatistics collectionStats)
         {
             long sumTotalTermFreq = collectionStats.SumTotalTermFreq;
             if (sumTotalTermFreq <= 0)
             {
                 return 1f; // field does not exist, or stat is unsupported
             }
             else
             {
                 return (float)(sumTotalTermFreq / (double)collectionStats.MaxDoc);
             }
         }

         /// <summary>
         /// The default implementation encodes <c>boost / sqrt(length)</c>
         /// with <see cref="SmallSingle.SingleToByte315(float)"/>.  This is compatible with
         /// Lucene's default implementation.  If you change this, then you should
         /// change <see cref="DecodeNormValue(byte)"/> to match.
         /// </summary>
         protected internal virtual byte EncodeNormValue(float boost, int fieldLength)
         {
             return SmallSingle.SingleToByte315(boost / (float)Math.Sqrt(fieldLength));
         }

         /// <summary>
         /// The default implementation returns <c>1 / f<sup>2</sup></c>
         /// where <c>f</c> is <see cref="SmallSingle.Byte315ToSingle(byte)"/>.
         /// </summary>
         protected internal virtual float DecodeNormValue(byte b)
         {
             return NORM_TABLE[b & 0xFF];
         }

         /// <summary>
         /// True if overlap tokens (tokens with a position of increment of zero) are
         /// discounted from the document's length.
         /// </summary>
         private bool discountOverlaps = true; // LUCENENET specific: made private, since value can be set/get through propery

         /// <summary>
         /// Gets or Sets whether overlap tokens (Tokens with 0 position increment) are
         /// ignored when computing norm.  By default this is true, meaning overlap
         /// tokens do not count when computing norms.
         /// </summary>
         public virtual bool DiscountOverlaps
         {
             get => discountOverlaps;
             set => discountOverlaps = value;
         }

         /// <summary>
         /// Cache of decoded bytes. </summary>
         private static readonly float[] NORM_TABLE = LoadNormTable();

         private static float[] LoadNormTable() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
         {
             float[] normTable = new float[256];
             for (int i = 0; i < 256; i++)
             {
                 float f = SmallSingle.SByte315ToSingle((sbyte)i);
                 normTable[i] = 1.0f / (f * f);
             }
             return normTable;
         }

         public override sealed long ComputeNorm(FieldInvertState state)
         {
             int numTerms = discountOverlaps ? state.Length - state.NumOverlap : state.Length;
             return EncodeNormValue(state.Boost, numTerms);
         }

         /// <summary>
         /// Computes a score factor for a simple term and returns an explanation
         /// for that score factor.
         ///
         /// <para/>
         /// The default implementation uses:
         ///
         /// <code>
         ///     Idf(docFreq, searcher.MaxDoc);
         /// </code>
         ///
         /// Note that <see cref="CollectionStatistics.MaxDoc"/> is used instead of
         /// <see cref="Lucene.Net.Index.IndexReader.NumDocs"/> because also
         /// <see cref="TermStatistics.DocFreq"/> is used, and when the latter
         /// is inaccurate, so is <see cref="CollectionStatistics.MaxDoc"/>, and in the same direction.
         /// In addition, <see cref="CollectionStatistics.MaxDoc"/> is more efficient to compute
         /// </summary>
         /// <param name="collectionStats"> collection-level statistics </param>
         /// <param name="termStats"> term-level statistics for the term </param>
         /// <returns> an <see cref="Explanation"/> object that includes both an idf score factor
         ///           and an explanation for the term. </returns>
         public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats)
         {
             long df = termStats.DocFreq;
             long max = collectionStats.MaxDoc;
             float idf = Idf(df, max);
             return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
         }

         /// <summary>
         /// Computes a score factor for a phrase.
         ///
         /// <para/>
         /// The default implementation sums the idf factor for
         /// each term in the phrase.
         /// </summary>
         /// <param name="collectionStats"> collection-level statistics </param>
         /// <param name="termStats"> term-level statistics for the terms in the phrase </param>
         /// <returns> an <see cref="Explanation"/> object that includes both an idf
         ///         score factor for the phrase and an explanation
         ///         for each term. </returns>
         public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats)
         {
             long max = collectionStats.MaxDoc;
             float idf = 0.0f;
             Explanation exp = new Explanation();
             exp.Description = "idf(), sum of:";
             foreach (TermStatistics stat in termStats)
             {
                 long df = stat.DocFreq;
                 float termIdf = Idf(df, max);
                 exp.AddDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
                 idf += termIdf;
             }
             exp.Value = idf;
             return exp;
         }

         public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
         {
             Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats);

             float avgdl = AvgFieldLength(collectionStats);

             // compute freq-independent part of bm25 equation across all norm values
             float[] cache = new float[256];
             for (int i = 0; i < cache.Length; i++)
             {
                 cache[i] = k1 * ((1 - b) + b * DecodeNormValue((byte)i) / avgdl);
             }
             return new BM25Stats(collectionStats.Field, idf, queryBoost, avgdl, cache);
         }

         public override sealed SimScorer GetSimScorer(SimWeight stats, AtomicReaderContext context)
         {
             BM25Stats bm25stats = (BM25Stats)stats;
             return new BM25DocScorer(this, bm25stats, context.AtomicReader.GetNormValues(bm25stats.Field));
         }

         private class BM25DocScorer : SimScorer
         {
             private readonly BM25Similarity outerInstance;

             private readonly BM25Stats stats;
             private readonly float weightValue; // boost * idf * (k1 + 1)
             private readonly NumericDocValues norms;
             private readonly float[] cache;

             internal BM25DocScorer(BM25Similarity outerInstance, BM25Stats stats, NumericDocValues norms)
             {
                 this.outerInstance = outerInstance;
                 this.stats = stats;
                 this.weightValue = stats.Weight * (outerInstance.k1 + 1);
                 this.cache = stats.Cache;
                 this.norms = norms;
             }

             public override float Score(int doc, float freq)
             {
                 // if there are no norms, we act as if b=0
                 float norm = norms is null ? outerInstance.k1 : cache[(sbyte)norms.Get(doc) & 0xFF];
                 return weightValue * freq / (freq + norm);
             }

             public override Explanation Explain(int doc, Explanation freq)
             {
                 return outerInstance.ExplainScore(doc, freq, stats, norms);
             }

             public override float ComputeSlopFactor(int distance)
             {
                 return outerInstance.SloppyFreq(distance);
             }

             public override float ComputePayloadFactor(int doc, int start, int end, BytesRef payload)
             {
                 return outerInstance.ScorePayload(doc, start, end, payload);
             }
         }

         /// <summary>
         /// Collection statistics for the BM25 model. </summary>
         private class BM25Stats : SimWeight
         {
             /// <summary>
             /// BM25's idf </summary>
             internal Explanation Idf { get; private set; }

             /// <summary>
             /// The average document length. </summary>
             internal float Avgdl { get; private set; }

             /// <summary>
             /// query's inner boost </summary>
             internal float QueryBoost { get; private set; }

             /// <summary>
             /// query's outer boost (only for explain) </summary>
             internal float TopLevelBoost { get; set; }

             /// <summary>
             /// weight (idf * boost) </summary>
             internal float Weight { get; set; }

             /// <summary>
             /// field name, for pulling norms </summary>
             internal string Field { get; private set; }

             /// <summary>
             /// precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) </summary>
             internal float[] Cache { get; private set; }

             internal BM25Stats(string field, Explanation idf, float queryBoost, float avgdl, float[] cache)
             {
                 this.Field = field;
                 this.Idf = idf;
                 this.QueryBoost = queryBoost;
                 this.Avgdl = avgdl;
                 this.Cache = cache;
             }

             public override float GetValueForNormalization()
             {
                 // we return a TF-IDF like normalization to be nice, but we don't actually normalize ourselves.
                 float queryWeight = Idf.Value * QueryBoost;
                 return queryWeight * queryWeight;
             }

             public override void Normalize(float queryNorm, float topLevelBoost)
             {
                 // we don't normalize with queryNorm at all, we just capture the top-level boost
                 this.TopLevelBoost = topLevelBoost;
                 this.Weight = Idf.Value * QueryBoost * topLevelBoost;
             }
         }

         private Explanation ExplainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms)
         {
             Explanation result = new Explanation();
             result.Description = "score(doc=" + doc + ",freq=" + freq + "), product of:";

             Explanation boostExpl = new Explanation(stats.QueryBoost * stats.TopLevelBoost, "boost");
             if (boostExpl.Value != 1.0f)
             {
                 result.AddDetail(boostExpl);
             }

             result.AddDetail(stats.Idf);

             Explanation tfNormExpl = new Explanation();
             tfNormExpl.Description = "tfNorm, computed from:";
             tfNormExpl.AddDetail(freq);
             tfNormExpl.AddDetail(new Explanation(k1, "parameter k1"));
             if (norms is null)
             {
                 tfNormExpl.AddDetail(new Explanation(0, "parameter b (norms omitted for field)"));
                 tfNormExpl.Value = (freq.Value * (k1 + 1)) / (freq.Value + k1);
             }
             else
             {
                 float doclen = DecodeNormValue((byte)norms.Get(doc));
                 tfNormExpl.AddDetail(new Explanation(b, "parameter b"));
                 tfNormExpl.AddDetail(new Explanation(stats.Avgdl, "avgFieldLength"));
                 tfNormExpl.AddDetail(new Explanation(doclen, "fieldLength"));
                 tfNormExpl.Value = (freq.Value * (k1 + 1)) / (freq.Value + k1 * (1 - b + b * doclen / stats.Avgdl));
             }
             result.AddDetail(tfNormExpl);
             result.Value = boostExpl.Value * stats.Idf.Value * tfNormExpl.Value;
             return result;
         }

         public override string ToString()
         {
             return "BM25(k1=" + k1 + ",b=" + b + ")";
         }

         /// <summary>
         /// Returns the <c>k1</c> parameter </summary>
         /// <seealso cref="BM25Similarity(float, float)"/>
         public virtual float K1 => k1;

         /// <summary>
         /// Returns the <c>b</c> parameter </summary>
         /// <seealso cref="BM25Similarity(float, float)"/>
         public virtual float B => b;
     }
 }
	using System;

	namespace Lucene.Net.Search.Similarities
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
	using BytesRef = Lucene.Net.Util.BytesRef;
	using FieldInvertState = Lucene.Net.Index.FieldInvertState;
	using NumericDocValues = Lucene.Net.Index.NumericDocValues;
	using SmallSingle = Lucene.Net.Util.SmallSingle;

	/// <summary>
	/// BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker,
	/// Susan Jones, Micheline Hancock-Beaulieu, and Mike Gatford. Okapi at TREC-3.
	/// In Proceedings of the Third <b>T</b>ext <b>RE</b>trieval <b>C</b>onference (TREC 1994).
	/// Gaithersburg, USA, November 1994.
	/// <para/>
	/// @lucene.experimental
	/// </summary>
	public class BM25Similarity : Similarity
	{
	private readonly float k1;
	private readonly float b;
	// TODO: should we add a delta like sifaka.cs.uiuc.edu/~ylv2/pub/sigir11-bm25l.pdf ?

	/// <summary>
	/// BM25 with the supplied parameter values. </summary>
	/// <param name="k1"> Controls non-linear term frequency normalization (saturation). </param>
	/// <param name="b"> Controls to what degree document length normalizes tf values. </param>
	public BM25Similarity(float k1, float b)
	{
	this.k1 = k1;
	this.b = b;
	}

	/// <summary>
	/// BM25 with these default values:
	/// <list type="bullet">
	/// <item><description><c>k1 = 1.2</c>,</description></item>
	/// <item><description><c>b = 0.75</c>.</description></item>
	/// </list>
	/// </summary>
	public BM25Similarity()
	{
	this.k1 = 1.2f;
	this.b = 0.75f;
	}

	/// <summary>
	/// Implemented as <c>log(1 + (numDocs - docFreq + 0.5)/(docFreq + 0.5))</c>. </summary>
	protected internal virtual float Idf(long docFreq, long numDocs)
	{
	return (float)Math.Log(1 + (numDocs - docFreq + 0.5D) / (docFreq + 0.5D));
	}

	/// <summary>
	/// Implemented as <c>1 / (distance + 1)</c>. </summary>
	protected internal virtual float SloppyFreq(int distance)
	{
	return 1.0f / (distance + 1);
	}

	/// <summary>
	/// The default implementation returns <c>1</c> </summary>
	protected internal virtual float ScorePayload(int doc, int start, int end, BytesRef payload)
	{
	return 1;
	}

	/// <summary>
	/// The default implementation computes the average as <c>sumTotalTermFreq / maxDoc</c>,
	/// or returns <c>1</c> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
	/// or any field that omits frequency information).
	/// </summary>
	protected internal virtual float AvgFieldLength(CollectionStatistics collectionStats)
	{
	long sumTotalTermFreq = collectionStats.SumTotalTermFreq;
	if (sumTotalTermFreq <= 0)
	{
	return 1f; // field does not exist, or stat is unsupported
	}
	else
	{
	return (float)(sumTotalTermFreq / (double)collectionStats.MaxDoc);
	}
	}

	/// <summary>
	/// The default implementation encodes <c>boost / sqrt(length)</c>
	/// with <see cref="SmallSingle.SingleToByte315(float)"/>. This is compatible with
	/// Lucene's default implementation. If you change this, then you should
	/// change <see cref="DecodeNormValue(byte)"/> to match.
	/// </summary>
	protected internal virtual byte EncodeNormValue(float boost, int fieldLength)
	{
	return SmallSingle.SingleToByte315(boost / (float)Math.Sqrt(fieldLength));
	}

	/// <summary>
	/// The default implementation returns <c>1 / f<sup>2</sup></c>
	/// where <c>f</c> is <see cref="SmallSingle.Byte315ToSingle(byte)"/>.
	/// </summary>
	protected internal virtual float DecodeNormValue(byte b)
	{
	return NORM_TABLE[b & 0xFF];
	}

	/// <summary>
	/// True if overlap tokens (tokens with a position of increment of zero) are
	/// discounted from the document's length.
	/// </summary>
	private bool discountOverlaps = true; // LUCENENET specific: made private, since value can be set/get through propery

	/// <summary>
	/// Gets or Sets whether overlap tokens (Tokens with 0 position increment) are
	/// ignored when computing norm. By default this is true, meaning overlap
	/// tokens do not count when computing norms.
	/// </summary>
	public virtual bool DiscountOverlaps
	{
	get => discountOverlaps;
	set => discountOverlaps = value;
	}

	/// <summary>
	/// Cache of decoded bytes. </summary>
	private static readonly float[] NORM_TABLE = LoadNormTable();

	private static float[] LoadNormTable() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
	{
	float[] normTable = new float[256];
	for (int i = 0; i < 256; i++)
	{
	float f = SmallSingle.SByte315ToSingle((sbyte)i);
	normTable[i] = 1.0f / (f * f);
	}
	return normTable;
	}

	public override sealed long ComputeNorm(FieldInvertState state)
	{
	int numTerms = discountOverlaps ? state.Length - state.NumOverlap : state.Length;
	return EncodeNormValue(state.Boost, numTerms);
	}

	/// <summary>
	/// Computes a score factor for a simple term and returns an explanation
	/// for that score factor.
	///
	/// <para/>
	/// The default implementation uses:
	///
	/// <code>
	/// Idf(docFreq, searcher.MaxDoc);
	/// </code>
	///
	/// Note that <see cref="CollectionStatistics.MaxDoc"/> is used instead of
	/// <see cref="Lucene.Net.Index.IndexReader.NumDocs"/> because also
	/// <see cref="TermStatistics.DocFreq"/> is used, and when the latter
	/// is inaccurate, so is <see cref="CollectionStatistics.MaxDoc"/>, and in the same direction.
	/// In addition, <see cref="CollectionStatistics.MaxDoc"/> is more efficient to compute
	/// </summary>
	/// <param name="collectionStats"> collection-level statistics </param>
	/// <param name="termStats"> term-level statistics for the term </param>
	/// <returns> an <see cref="Explanation"/> object that includes both an idf score factor
	/// and an explanation for the term. </returns>
	public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics termStats)
	{
	long df = termStats.DocFreq;
	long max = collectionStats.MaxDoc;
	float idf = Idf(df, max);
	return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
	}

	/// <summary>
	/// Computes a score factor for a phrase.
	///
	/// <para/>
	/// The default implementation sums the idf factor for
	/// each term in the phrase.
	/// </summary>
	/// <param name="collectionStats"> collection-level statistics </param>
	/// <param name="termStats"> term-level statistics for the terms in the phrase </param>
	/// <returns> an <see cref="Explanation"/> object that includes both an idf
	/// score factor for the phrase and an explanation
	/// for each term. </returns>
	public virtual Explanation IdfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats)
	{
	long max = collectionStats.MaxDoc;
	float idf = 0.0f;
	Explanation exp = new Explanation();
	exp.Description = "idf(), sum of:";
	foreach (TermStatistics stat in termStats)
	{
	long df = stat.DocFreq;
	float termIdf = Idf(df, max);
	exp.AddDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
	idf += termIdf;
	}
	exp.Value = idf;
	return exp;
	}

	public override sealed SimWeight ComputeWeight(float queryBoost, CollectionStatistics collectionStats, params TermStatistics[] termStats)
	{
	Explanation idf = termStats.Length == 1 ? IdfExplain(collectionStats, termStats[0]) : IdfExplain(collectionStats, termStats);

	float avgdl = AvgFieldLength(collectionStats);

	// compute freq-independent part of bm25 equation across all norm values
	float[] cache = new float[256];
	for (int i = 0; i < cache.Length; i++)
	{
	cache[i] = k1 * ((1 - b) + b * DecodeNormValue((byte)i) / avgdl);
	}
	return new BM25Stats(collectionStats.Field, idf, queryBoost, avgdl, cache);
	}

	public override sealed SimScorer GetSimScorer(SimWeight stats, AtomicReaderContext context)
	{
	BM25Stats bm25stats = (BM25Stats)stats;
	return new BM25DocScorer(this, bm25stats, context.AtomicReader.GetNormValues(bm25stats.Field));
	}

	private class BM25DocScorer : SimScorer
	{
	private readonly BM25Similarity outerInstance;

	private readonly BM25Stats stats;
	private readonly float weightValue; // boost * idf * (k1 + 1)
	private readonly NumericDocValues norms;
	private readonly float[] cache;

	internal BM25DocScorer(BM25Similarity outerInstance, BM25Stats stats, NumericDocValues norms)
	{
	this.outerInstance = outerInstance;
	this.stats = stats;
	this.weightValue = stats.Weight * (outerInstance.k1 + 1);
	this.cache = stats.Cache;
	this.norms = norms;
	}

	public override float Score(int doc, float freq)
	{
	// if there are no norms, we act as if b=0
	float norm = norms is null ? outerInstance.k1 : cache[(sbyte)norms.Get(doc) & 0xFF];
	return weightValue * freq / (freq + norm);
	}

	public override Explanation Explain(int doc, Explanation freq)
	{
	return outerInstance.ExplainScore(doc, freq, stats, norms);
	}

	public override float ComputeSlopFactor(int distance)
	{
	return outerInstance.SloppyFreq(distance);
	}

	public override float ComputePayloadFactor(int doc, int start, int end, BytesRef payload)
	{
	return outerInstance.ScorePayload(doc, start, end, payload);
	}
	}

	/// <summary>
	/// Collection statistics for the BM25 model. </summary>
	private class BM25Stats : SimWeight
	{
	/// <summary>
	/// BM25's idf </summary>
	internal Explanation Idf { get; private set; }

	/// <summary>
	/// The average document length. </summary>
	internal float Avgdl { get; private set; }

	/// <summary>
	/// query's inner boost </summary>
	internal float QueryBoost { get; private set; }

	/// <summary>
	/// query's outer boost (only for explain) </summary>
	internal float TopLevelBoost { get; set; }

	/// <summary>
	/// weight (idf * boost) </summary>
	internal float Weight { get; set; }

	/// <summary>
	/// field name, for pulling norms </summary>
	internal string Field { get; private set; }

	/// <summary>
	/// precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) </summary>
	internal float[] Cache { get; private set; }

	internal BM25Stats(string field, Explanation idf, float queryBoost, float avgdl, float[] cache)
	{
	this.Field = field;
	this.Idf = idf;
	this.QueryBoost = queryBoost;
	this.Avgdl = avgdl;
	this.Cache = cache;
	}

	public override float GetValueForNormalization()
	{
	// we return a TF-IDF like normalization to be nice, but we don't actually normalize ourselves.
	float queryWeight = Idf.Value * QueryBoost;
	return queryWeight * queryWeight;
	}

	public override void Normalize(float queryNorm, float topLevelBoost)
	{
	// we don't normalize with queryNorm at all, we just capture the top-level boost
	this.TopLevelBoost = topLevelBoost;
	this.Weight = Idf.Value * QueryBoost * topLevelBoost;
	}
	}

	private Explanation ExplainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms)
	{
	Explanation result = new Explanation();
	result.Description = "score(doc=" + doc + ",freq=" + freq + "), product of:";

	Explanation boostExpl = new Explanation(stats.QueryBoost * stats.TopLevelBoost, "boost");
	if (boostExpl.Value != 1.0f)
	{
	result.AddDetail(boostExpl);
	}

	result.AddDetail(stats.Idf);

	Explanation tfNormExpl = new Explanation();
	tfNormExpl.Description = "tfNorm, computed from:";
	tfNormExpl.AddDetail(freq);
	tfNormExpl.AddDetail(new Explanation(k1, "parameter k1"));
	if (norms is null)
	{
	tfNormExpl.AddDetail(new Explanation(0, "parameter b (norms omitted for field)"));
	tfNormExpl.Value = (freq.Value * (k1 + 1)) / (freq.Value + k1);
	}
	else
	{
	float doclen = DecodeNormValue((byte)norms.Get(doc));
	tfNormExpl.AddDetail(new Explanation(b, "parameter b"));
	tfNormExpl.AddDetail(new Explanation(stats.Avgdl, "avgFieldLength"));
	tfNormExpl.AddDetail(new Explanation(doclen, "fieldLength"));
	tfNormExpl.Value = (freq.Value * (k1 + 1)) / (freq.Value + k1 * (1 - b + b * doclen / stats.Avgdl));
	}
	result.AddDetail(tfNormExpl);
	result.Value = boostExpl.Value * stats.Idf.Value * tfNormExpl.Value;
	return result;
	}

	public override string ToString()
	{
	return "BM25(k1=" + k1 + ",b=" + b + ")";
	}

	/// <summary>
	/// Returns the <c>k1</c> parameter </summary>
	/// <seealso cref="BM25Similarity(float, float)"/>
	public virtual float K1 => k1;

	/// <summary>
	/// Returns the <c>b</c> parameter </summary>
	/// <seealso cref="BM25Similarity(float, float)"/>
	public virtual float B => b;
	}
	}