blob: 19ff987174ff9e9eb1297a7b537808f3811ce369 [file] [log] [blame]
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Queries.Function;
using Lucene.Net.Queries.Function.ValueSources;
using Lucene.Net.Search;
using Lucene.Net.Search.Similarities;
using Lucene.Net.Store;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
namespace Lucene.Net.Tests.Queries.Function
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
[SuppressCodecs("Lucene3x")]
public class TestLongNormValueSource : LuceneTestCase
{
internal static Directory dir;
internal static IndexReader reader;
internal static IndexSearcher searcher;
private static Similarity sim = new PreciseDefaultSimilarity();
[SetUp]
public override void SetUp()
{
base.SetUp();
dir = NewDirectory();
IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwConfig.SetMergePolicy(NewLogMergePolicy());
iwConfig.SetSimilarity(sim);
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwConfig);
Document doc = new Document();
doc.Add(new TextField("text", "this is a test test test", Field.Store.NO));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new TextField("text", "second test", Field.Store.NO));
iw.AddDocument(doc);
reader = iw.GetReader();
searcher = NewSearcher(reader);
iw.Dispose();
}
[TearDown]
public override void TearDown()
{
searcher = null;
reader.Dispose();
reader = null;
dir.Dispose();
dir = null;
base.TearDown();
}
[Test]
public void TestNorm()
{
Similarity saved = searcher.Similarity;
try
{
// no norm field (so agnostic to indexed similarity)
searcher.Similarity = sim;
AssertHits(new FunctionQuery(
new NormValueSource("text")),
new float[] { 0f, 0f });
}
finally
{
searcher.Similarity = saved;
}
}
internal void AssertHits(Query q, float[] scores)
{
ScoreDoc[] expected = new ScoreDoc[scores.Length];
int[] expectedDocs = new int[scores.Length];
for (int i = 0; i < expected.Length; i++)
{
expectedDocs[i] = i;
expected[i] = new ScoreDoc(i, scores[i]);
}
TopDocs docs = searcher.Search(q, 2, new Sort(new SortField("id", SortFieldType.STRING)));
/*
for (int i=0;i<docs.scoreDocs.length;i++) {
System.out.println(searcher.explain(q, docs.scoreDocs[i].doc));
}
*/
CheckHits.DoCheckHits(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, q, "", searcher, expectedDocs);
CheckHits.CheckHitsQuery(q, expected, docs.ScoreDocs, expectedDocs);
CheckHits.CheckExplanations(q, "", searcher);
}
}
/// <summary>
/// Encodes norm as 4-byte float. </summary>
internal class PreciseDefaultSimilarity : TFIDFSimilarity
{
/// <summary>
/// Sole constructor: parameter-free </summary>
public PreciseDefaultSimilarity()
{
}
/// <summary>
/// Implemented as <code>overlap / maxOverlap</code>. </summary>
public override float Coord(int overlap, int maxOverlap)
{
return overlap / (float)maxOverlap;
}
/// <summary>
/// Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. </summary>
public override float QueryNorm(float sumOfSquaredWeights)
{
return (float)(1.0 / Math.Sqrt(sumOfSquaredWeights));
}
/// <summary>
/// Encodes a normalization factor for storage in an index.
/// <p>
/// The encoding uses a three-bit mantissa, a five-bit exponent, and the
/// zero-exponent point at 15, thus representing values from around 7x10^9 to
/// 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
/// represented. Negative numbers are rounded up to zero. Values too large to
/// represent are rounded down to the largest representable value. Positive
/// values too small to represent are rounded up to the smallest positive
/// representable value.
/// </summary>
/// <seealso cref= org.apache.lucene.document.Field#setBoost(float) </seealso>
/// <seealso cref= org.apache.lucene.util.SmallFloat </seealso>
public override long EncodeNormValue(float f)
{
return J2N.BitConversion.SingleToInt32Bits(f);
}
/// <summary>
/// Decodes the norm value, assuming it is a single byte.
/// </summary>
/// <seealso cref= #encodeNormValue(float) </seealso>
public override float DecodeNormValue(long norm)
{
return J2N.BitConversion.Int32BitsToSingle((int)norm);
}
/// <summary>
/// Implemented as
/// <c>state.Boost*LengthNorm(numTerms)</c>, where
/// <c>numTerms</c> is <see cref="FieldInvertState.Length"/> if
/// <see cref="DiscountOverlaps"/># is false, else it's
/// <see cref="FieldInvertState.Length"/> -
/// <see cref="FieldInvertState.NumOverlap"/>.
/// <para/>
/// @lucene.experimental
/// </summary>
public override float LengthNorm(FieldInvertState state)
{
int numTerms;
if (discountOverlaps)
{
numTerms = state.Length - state.NumOverlap;
}
else
{
numTerms = state.Length;
}
return state.Boost * ((float)(1.0 / Math.Sqrt(numTerms)));
}
/// <summary>
/// Implemented as <code>sqrt(freq)</code>. </summary>
public override float Tf(float freq)
{
return (float)Math.Sqrt(freq);
}
/// <summary>
/// Implemented as <code>1 / (distance + 1)</code>.
/// </summary>
public override float SloppyFreq(int distance)
{
return 1.0f / (distance + 1);
}
/// <summary>
/// The default implementation returns <code>1</code>
/// </summary>
public override float ScorePayload(int doc, int start, int end, BytesRef payload)
{
return 1;
}
/// <summary>
/// Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>.
/// </summary>
public override float Idf(long docFreq, long numDocs)
{
return (float)(Math.Log(numDocs / (double)(docFreq + 1)) + 1.0);
}
/// <summary>
/// True if overlap tokens (tokens with a position of increment of zero) are
/// discounted from the document's length.
/// </summary>
protected internal bool discountOverlaps = true;
/// <summary>
/// Determines whether overlap tokens (Tokens with
/// 0 position increment) are ignored when computing
/// norm. By default this is true, meaning overlap
/// tokens do not count when computing norms.
///
/// @lucene.experimental
/// </summary>
/// <seealso cref= #computeNorm </seealso>
public virtual bool DiscountOverlaps
{
set => discountOverlaps = value;
get => discountOverlaps;
}
public override string ToString()
{
return "DefaultSimilarity";
}
}
}