src/Lucene.Net.Tests.Queries/Function/TestLongNormValueSource.cs - lucenenet - Git at Google

 // Lucene version compatibility level 4.8.1
 using Lucene.Net.Analysis;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
 using Lucene.Net.Index.Extensions;
 using Lucene.Net.Queries.Function;
 using Lucene.Net.Queries.Function.ValueSources;
 using Lucene.Net.Search;
 using Lucene.Net.Search.Similarities;
 using Lucene.Net.Store;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System;

 namespace Lucene.Net.Tests.Queries.Function
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     [SuppressCodecs("Lucene3x")]
     public class TestLongNormValueSource : LuceneTestCase
     {
         internal static Directory dir;
         internal static IndexReader reader;
         internal static IndexSearcher searcher;
         private static Similarity sim = new PreciseDefaultSimilarity();

         [SetUp]
         public override void SetUp()
         {
             base.SetUp();

             dir = NewDirectory();
             IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
             iwConfig.SetMergePolicy(NewLogMergePolicy());
             iwConfig.SetSimilarity(sim);
             RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig);

             Document doc = new Document();
             doc.Add(new TextField("text", "this is a test test test", Field.Store.NO));
             iw.AddDocument(doc);

             doc = new Document();
             doc.Add(new TextField("text", "second test", Field.Store.NO));
             iw.AddDocument(doc);

             reader = iw.GetReader();
             searcher = NewSearcher(reader);
             iw.Dispose();
         }

         [TearDown]
         public override void TearDown()
         {
             searcher = null;
             reader.Dispose();
             reader = null;
             dir.Dispose();
             dir = null;
             base.TearDown();
         }

         [Test]
         public void TestNorm()
         {
             Similarity saved = searcher.Similarity;
             try
             {
                 // no norm field (so agnostic to indexed similarity)
                 searcher.Similarity = sim;
                 AssertHits(new FunctionQuery(
                     new NormValueSource("text")),
                     new float[] { 0f, 0f });
             }
             finally
             {
                 searcher.Similarity = saved;
             }
         }

         internal void AssertHits(Query q, float[] scores)
         {
             ScoreDoc[] expected = new ScoreDoc[scores.Length];
             int[] expectedDocs = new int[scores.Length];
             for (int i = 0; i < expected.Length; i++)
             {
                 expectedDocs[i] = i;
                 expected[i] = new ScoreDoc(i, scores[i]);
             }
             TopDocs docs = searcher.Search(q, 2, new Sort(new SortField("id", SortFieldType.STRING)));

             /*
             for (int i=0;i<docs.scoreDocs.length;i++) {
               System.out.println(searcher.explain(q, docs.scoreDocs[i].doc));
             }
             */

             CheckHits.DoCheckHits(
 #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                 this,
 #endif
                 Random, q, "", searcher, expectedDocs);
             CheckHits.CheckHitsQuery(q, expected, docs.ScoreDocs, expectedDocs);
             CheckHits.CheckExplanations(q, "", searcher);
         }
     }

     /// <summary>
     /// Encodes norm as 4-byte float. </summary>
     internal class PreciseDefaultSimilarity : TFIDFSimilarity
     {
         /// <summary>
         /// Sole constructor: parameter-free </summary>
         public PreciseDefaultSimilarity()
         {
         }

         /// <summary>
         /// Implemented as <code>overlap / maxOverlap</code>. </summary>
         public override float Coord(int overlap, int maxOverlap)
         {
             return overlap / (float)maxOverlap;
         }

         /// <summary>
         /// Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. </summary>
         public override float QueryNorm(float sumOfSquaredWeights)
         {
             return (float)(1.0 / Math.Sqrt(sumOfSquaredWeights));
         }

         /// <summary>
         /// Encodes a normalization factor for storage in an index.
         /// <p>
         /// The encoding uses a three-bit mantissa, a five-bit exponent, and the
         /// zero-exponent point at 15, thus representing values from around 7x10^9 to
         /// 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
         /// represented. Negative numbers are rounded up to zero. Values too large to
         /// represent are rounded down to the largest representable value. Positive
         /// values too small to represent are rounded up to the smallest positive
         /// representable value.
         /// </summary>
         /// <seealso cref= org.apache.lucene.document.Field#setBoost(float) </seealso>
         /// <seealso cref= org.apache.lucene.util.SmallFloat </seealso>
         public override long EncodeNormValue(float f)
         {
             return J2N.BitConversion.SingleToInt32Bits(f);
         }

         /// <summary>
         /// Decodes the norm value, assuming it is a single byte.
         /// </summary>
         /// <seealso cref= #encodeNormValue(float) </seealso>
         public override float DecodeNormValue(long norm)
         {
             return J2N.BitConversion.Int32BitsToSingle((int)norm);
         }

         /// <summary>
         /// Implemented as
         /// <c>state.Boost*LengthNorm(numTerms)</c>, where
         /// <c>numTerms</c> is <see cref="FieldInvertState.Length"/> if
         /// <see cref="DiscountOverlaps"/># is false, else it's
         /// <see cref="FieldInvertState.Length"/> -
         /// <see cref="FieldInvertState.NumOverlap"/>.
         /// <para/>
         /// @lucene.experimental
         /// </summary>
         public override float LengthNorm(FieldInvertState state)
         {
             int numTerms;
             if (discountOverlaps)
             {
                 numTerms = state.Length - state.NumOverlap;
             }
             else
             {
                 numTerms = state.Length;
             }
             return state.Boost * ((float)(1.0 / Math.Sqrt(numTerms)));
         }

         /// <summary>
         /// Implemented as <code>sqrt(freq)</code>. </summary>
         public override float Tf(float freq)
         {
             return (float)Math.Sqrt(freq);
         }

         /// <summary>
         /// Implemented as <code>1 / (distance + 1)</code>.
         /// </summary>
         public override float SloppyFreq(int distance)
         {
             return 1.0f / (distance + 1);
         }

         /// <summary>
         /// The default implementation returns <code>1</code>
         /// </summary>
         public override float ScorePayload(int doc, int start, int end, BytesRef payload)
         {
             return 1;
         }

         /// <summary>
         /// Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>.
         /// </summary>
         public override float Idf(long docFreq, long numDocs)
         {
             return (float)(Math.Log(numDocs / (double)(docFreq + 1)) + 1.0);
         }

         /// <summary>
         /// True if overlap tokens (tokens with a position of increment of zero) are
         /// discounted from the document's length.
         /// </summary>
         protected internal bool discountOverlaps = true;

         /// <summary>
         /// Determines whether overlap tokens (Tokens with
         ///  0 position increment) are ignored when computing
         ///  norm.  By default this is true, meaning overlap
         ///  tokens do not count when computing norms.
         ///
         ///  @lucene.experimental
         /// </summary>
         ///  <seealso cref= #computeNorm </seealso>
         public virtual bool DiscountOverlaps
         {
             set => discountOverlaps = value;
             get => discountOverlaps;
         }

         public override string ToString()
         {
             return "DefaultSimilarity";
         }
     }
 }
	// Lucene version compatibility level 4.8.1
	using Lucene.Net.Analysis;
	using Lucene.Net.Documents;
	using Lucene.Net.Index;
	using Lucene.Net.Index.Extensions;
	using Lucene.Net.Queries.Function;
	using Lucene.Net.Queries.Function.ValueSources;
	using Lucene.Net.Search;
	using Lucene.Net.Search.Similarities;
	using Lucene.Net.Store;
	using Lucene.Net.Util;
	using NUnit.Framework;
	using System;

	namespace Lucene.Net.Tests.Queries.Function
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	[SuppressCodecs("Lucene3x")]
	public class TestLongNormValueSource : LuceneTestCase
	{
	internal static Directory dir;
	internal static IndexReader reader;
	internal static IndexSearcher searcher;
	private static Similarity sim = new PreciseDefaultSimilarity();

	[SetUp]
	public override void SetUp()
	{
	base.SetUp();

	dir = NewDirectory();
	IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
	iwConfig.SetMergePolicy(NewLogMergePolicy());
	iwConfig.SetSimilarity(sim);
	RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig);

	Document doc = new Document();
	doc.Add(new TextField("text", "this is a test test test", Field.Store.NO));
	iw.AddDocument(doc);

	doc = new Document();
	doc.Add(new TextField("text", "second test", Field.Store.NO));
	iw.AddDocument(doc);

	reader = iw.GetReader();
	searcher = NewSearcher(reader);
	iw.Dispose();
	}

	[TearDown]
	public override void TearDown()
	{
	searcher = null;
	reader.Dispose();
	reader = null;
	dir.Dispose();
	dir = null;
	base.TearDown();
	}

	[Test]
	public void TestNorm()
	{
	Similarity saved = searcher.Similarity;
	try
	{
	// no norm field (so agnostic to indexed similarity)
	searcher.Similarity = sim;
	AssertHits(new FunctionQuery(
	new NormValueSource("text")),
	new float[] { 0f, 0f });
	}
	finally
	{
	searcher.Similarity = saved;
	}
	}

	internal void AssertHits(Query q, float[] scores)
	{
	ScoreDoc[] expected = new ScoreDoc[scores.Length];
	int[] expectedDocs = new int[scores.Length];
	for (int i = 0; i < expected.Length; i++)
	{
	expectedDocs[i] = i;
	expected[i] = new ScoreDoc(i, scores[i]);
	}
	TopDocs docs = searcher.Search(q, 2, new Sort(new SortField("id", SortFieldType.STRING)));

	/*
	for (int i=0;i<docs.scoreDocs.length;i++) {
	System.out.println(searcher.explain(q, docs.scoreDocs[i].doc));
	}
	*/

	CheckHits.DoCheckHits(
	#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
	this,
	#endif
	Random, q, "", searcher, expectedDocs);
	CheckHits.CheckHitsQuery(q, expected, docs.ScoreDocs, expectedDocs);
	CheckHits.CheckExplanations(q, "", searcher);
	}
	}

	/// <summary>
	/// Encodes norm as 4-byte float. </summary>
	internal class PreciseDefaultSimilarity : TFIDFSimilarity
	{
	/// <summary>
	/// Sole constructor: parameter-free </summary>
	public PreciseDefaultSimilarity()
	{
	}

	/// <summary>
	/// Implemented as <code>overlap / maxOverlap</code>. </summary>
	public override float Coord(int overlap, int maxOverlap)
	{
	return overlap / (float)maxOverlap;
	}

	/// <summary>
	/// Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. </summary>
	public override float QueryNorm(float sumOfSquaredWeights)
	{
	return (float)(1.0 / Math.Sqrt(sumOfSquaredWeights));
	}

	/// <summary>
	/// Encodes a normalization factor for storage in an index.
	/// <p>
	/// The encoding uses a three-bit mantissa, a five-bit exponent, and the
	/// zero-exponent point at 15, thus representing values from around 7x10^9 to
	/// 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
	/// represented. Negative numbers are rounded up to zero. Values too large to
	/// represent are rounded down to the largest representable value. Positive
	/// values too small to represent are rounded up to the smallest positive
	/// representable value.
	/// </summary>
	/// <seealso cref= org.apache.lucene.document.Field#setBoost(float) </seealso>
	/// <seealso cref= org.apache.lucene.util.SmallFloat </seealso>
	public override long EncodeNormValue(float f)
	{
	return J2N.BitConversion.SingleToInt32Bits(f);
	}

	/// <summary>
	/// Decodes the norm value, assuming it is a single byte.
	/// </summary>
	/// <seealso cref= #encodeNormValue(float) </seealso>
	public override float DecodeNormValue(long norm)
	{
	return J2N.BitConversion.Int32BitsToSingle((int)norm);
	}

	/// <summary>
	/// Implemented as
	/// <c>state.Boost*LengthNorm(numTerms)</c>, where
	/// <c>numTerms</c> is <see cref="FieldInvertState.Length"/> if
	/// <see cref="DiscountOverlaps"/># is false, else it's
	/// <see cref="FieldInvertState.Length"/> -
	/// <see cref="FieldInvertState.NumOverlap"/>.
	/// <para/>
	/// @lucene.experimental
	/// </summary>
	public override float LengthNorm(FieldInvertState state)
	{
	int numTerms;
	if (discountOverlaps)
	{
	numTerms = state.Length - state.NumOverlap;
	}
	else
	{
	numTerms = state.Length;
	}
	return state.Boost * ((float)(1.0 / Math.Sqrt(numTerms)));
	}

	/// <summary>
	/// Implemented as <code>sqrt(freq)</code>. </summary>
	public override float Tf(float freq)
	{
	return (float)Math.Sqrt(freq);
	}

	/// <summary>
	/// Implemented as <code>1 / (distance + 1)</code>.
	/// </summary>
	public override float SloppyFreq(int distance)
	{
	return 1.0f / (distance + 1);
	}

	/// <summary>
	/// The default implementation returns <code>1</code>
	/// </summary>
	public override float ScorePayload(int doc, int start, int end, BytesRef payload)
	{
	return 1;
	}

	/// <summary>
	/// Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>.
	/// </summary>
	public override float Idf(long docFreq, long numDocs)
	{
	return (float)(Math.Log(numDocs / (double)(docFreq + 1)) + 1.0);
	}

	/// <summary>
	/// True if overlap tokens (tokens with a position of increment of zero) are
	/// discounted from the document's length.
	/// </summary>
	protected internal bool discountOverlaps = true;

	/// <summary>
	/// Determines whether overlap tokens (Tokens with
	/// 0 position increment) are ignored when computing
	/// norm. By default this is true, meaning overlap
	/// tokens do not count when computing norms.
	///
	/// @lucene.experimental
	/// </summary>
	/// <seealso cref= #computeNorm </seealso>
	public virtual bool DiscountOverlaps
	{
	set => discountOverlaps = value;
	get => discountOverlaps;
	}

	public override string ToString()
	{
	return "DefaultSimilarity";
	}
	}
	}