blob: ef0b2f18e728cd47e332ffcbd230ba19d7899ab3 [file] [log] [blame]
using System;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Queries.Function;
using Lucene.Net.Queries.Function.ValueSources;
using Lucene.Net.Search;
using Lucene.Net.Search.Similarities;
using Lucene.Net.Store;
using Lucene.Net.Util;
using NUnit.Framework;
namespace Lucene.Net.Tests.Queries.Function
{
// [Util.LuceneTestCase.SuppressCodecs("Lucene3x")]
public class TestLongNormValueSource : LuceneTestCase
{
internal static Directory dir;
internal static IndexReader reader;
internal static IndexSearcher searcher;
private static Similarity sim = new PreciseDefaultSimilarity();
[SetUp]
public override void SetUp()
{
base.SetUp();
dir = NewDirectory();
IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
iwConfig.SetMergePolicy(NewLogMergePolicy());
iwConfig.SetSimilarity(sim);
RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig);
Document doc = new Document();
doc.Add(new TextField("text", "this is a test test test", Field.Store.NO));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new TextField("text", "second test", Field.Store.NO));
iw.AddDocument(doc);
reader = iw.Reader;
searcher = NewSearcher(reader);
iw.Dispose();
}
[TearDown]
public override void TearDown()
{
base.TearDown();
searcher = null;
reader.Dispose();
reader = null;
dir.Dispose();
dir = null;
}
[Test]
public void TestNorm()
{
Similarity saved = searcher.Similarity;
try
{
// no norm field (so agnostic to indexed similarity)
searcher.Similarity = sim;
AssertHits(new FunctionQuery(new NormValueSource("text")), new float[] { 0f, 0f });
}
finally
{
searcher.Similarity = saved;
}
}
protected virtual void AssertHits(Query q, float[] scores)
{
ScoreDoc[] expected = new ScoreDoc[scores.Length];
int[] expectedDocs = new int[scores.Length];
for (int i = 0; i < expected.Length; i++)
{
expectedDocs[i] = i;
expected[i] = new ScoreDoc(i, scores[i]);
}
TopDocs docs = searcher.Search(q, 2, new Sort(new SortField("id", SortField.Type_e.STRING)));
/*
for (int i=0;i<docs.scoreDocs.length;i++) {
System.out.println(searcher.explain(q, docs.scoreDocs[i].doc));
}
*/
CheckHits.DoCheckHits(Random(), q, "", searcher, expectedDocs, Similarity);
CheckHits.CheckHitsQuery(q, expected, docs.ScoreDocs, expectedDocs);
CheckHits.CheckExplanations(q, "", searcher);
}
}
/// <summary>
/// Encodes norm as 4-byte float. </summary>
internal class PreciseDefaultSimilarity : TFIDFSimilarity
{
/// <summary>
/// Sole constructor: parameter-free </summary>
public PreciseDefaultSimilarity()
{
}
/// <summary>
/// Implemented as <code>overlap / maxOverlap</code>. </summary>
public override float Coord(int overlap, int maxOverlap)
{
return overlap / (float)maxOverlap;
}
/// <summary>
/// Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. </summary>
public override float QueryNorm(float sumOfSquaredWeights)
{
return (float)(1.0 / Math.Sqrt(sumOfSquaredWeights));
}
/// <summary>
/// Encodes a normalization factor for storage in an index.
/// <p>
/// The encoding uses a three-bit mantissa, a five-bit exponent, and the
/// zero-exponent point at 15, thus representing values from around 7x10^9 to
/// 2x10^-9 with about one significant decimal digit of accuracy. Zero is also
/// represented. Negative numbers are rounded up to zero. Values too large to
/// represent are rounded down to the largest representable value. Positive
/// values too small to represent are rounded up to the smallest positive
/// representable value.
/// </summary>
/// <seealso cref= org.apache.lucene.document.Field#setBoost(float) </seealso>
/// <seealso cref= org.apache.lucene.util.SmallFloat </seealso>
public override long EncodeNormValue(float f)
{
return BitConverter.DoubleToInt64Bits(f);
}
/// <summary>
/// Decodes the norm value, assuming it is a single byte.
/// </summary>
/// <seealso cref= #encodeNormValue(float) </seealso>
public override float DecodeNormValue(long norm)
{
return (float) BitConverter.Int64BitsToDouble(norm);
}
/// <summary>
/// Implemented as
/// <code>state.getBoost()*lengthNorm(numTerms)</code>, where
/// <code>numTerms</code> is <seealso cref="FieldInvertState#getLength()"/> if {@link
/// #setDiscountOverlaps} is false, else it's {@link
/// org.apache.lucene.index.FieldInvertState#getLength()} - {@link
/// org.apache.lucene.index.FieldInvertState#getNumOverlap()}.
///
/// @lucene.experimental
/// </summary>
public override float LengthNorm(FieldInvertState state)
{
int numTerms;
if (discountOverlaps)
{
numTerms = state.Length - state.NumOverlap;
}
else
{
numTerms = state.Length;
}
return state.Boost * ((float)(1.0 / Math.Sqrt(numTerms)));
}
/// <summary>
/// Implemented as <code>sqrt(freq)</code>. </summary>
public override float Tf(float freq)
{
return (float)Math.Sqrt(freq);
}
/// <summary>
/// Implemented as <code>1 / (distance + 1)</code>.
/// </summary>
public override float SloppyFreq(int distance)
{
return 1.0f / (distance + 1);
}
/// <summary>
/// The default implementation returns <code>1</code>
/// </summary>
public override float ScorePayload(int doc, int start, int end, BytesRef payload)
{
return 1;
}
/// <summary>
/// Implemented as <code>log(numDocs/(docFreq+1)) + 1</code>.
/// </summary>
public override float Idf(long docFreq, long numDocs)
{
return (float)(Math.Log(numDocs / (double)(docFreq + 1)) + 1.0);
}
/// <summary>
/// True if overlap tokens (tokens with a position of increment of zero) are
/// discounted from the document's length.
/// </summary>
protected internal bool discountOverlaps = true;
/// <summary>
/// Determines whether overlap tokens (Tokens with
/// 0 position increment) are ignored when computing
/// norm. By default this is true, meaning overlap
/// tokens do not count when computing norms.
///
/// @lucene.experimental
/// </summary>
/// <seealso cref= #computeNorm </seealso>
public virtual bool DiscountOverlaps
{
set { discountOverlaps = value; }
get { return discountOverlaps; }
}
public override string ToString()
{
return "DefaultSimilarity";
}
}
}