blob: 6d1a4d05508838b08e0bffcf2172f10050992815 [file] [log] [blame]
using Lucene.Net.Documents;
using Lucene.Net.Support;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Text;
using Assert = Lucene.Net.TestFramework.Assert;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity;
using Directory = Lucene.Net.Store.Directory;
using Document = Documents.Document;
using Field = Field;
using IBits = Lucene.Net.Util.IBits;
using IndexReader = Lucene.Net.Index.IndexReader;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using NumericDocValuesField = NumericDocValuesField;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
using Term = Lucene.Net.Index.Term;
using TestUtil = Lucene.Net.Util.TestUtil;
[SuppressCodecs("Lucene3x")]
[TestFixture]
public class TestQueryRescorer : LuceneTestCase
{
private IndexSearcher GetSearcher(IndexReader r)
{
IndexSearcher searcher = NewSearcher(r);
// We rely on more tokens = lower score:
searcher.Similarity = new DefaultSimilarity();
return searcher;
}
[Test]
public virtual void TestBasic()
{
Directory dir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewStringField("id", "0", Field.Store.YES));
doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.AddDocument(doc);
IndexReader r = w.GetReader();
w.Dispose();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = GetSearcher(r);
searcher.Similarity = new DefaultSimilarity();
TopDocs hits = searcher.Search(bq, 10);
Assert.AreEqual(2, hits.TotalHits);
Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));
// Now, resort using PhraseQuery:
PhraseQuery pq = new PhraseQuery();
pq.Slop = 5;
pq.Add(new Term("field", "wizard"));
pq.Add(new Term("field", "oz"));
TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10);
// Resorting changed the order:
Assert.AreEqual(2, hits2.TotalHits);
Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));
// Resort using SpanNearQuery:
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);
TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10);
// Resorting changed the order:
Assert.AreEqual(2, hits3.TotalHits);
Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id"));
r.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestCustomCombine()
{
Directory dir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewStringField("id", "0", Field.Store.YES));
doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.AddDocument(doc);
IndexReader r = w.GetReader();
w.Dispose();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = GetSearcher(r);
TopDocs hits = searcher.Search(bq, 10);
Assert.AreEqual(2, hits.TotalHits);
Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));
// Now, resort using PhraseQuery, but with an
// opposite-world combine:
PhraseQuery pq = new PhraseQuery();
pq.Slop = 5;
pq.Add(new Term("field", "wizard"));
pq.Add(new Term("field", "oz"));
TopDocs hits2 = new QueryRescorerAnonymousClass(this, pq)
.Rescore(searcher, hits, 10);
// Resorting didn't change the order:
Assert.AreEqual(2, hits2.TotalHits);
Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));
r.Dispose();
dir.Dispose();
}
private class QueryRescorerAnonymousClass : QueryRescorer
{
private readonly TestQueryRescorer outerInstance;
public QueryRescorerAnonymousClass(TestQueryRescorer outerInstance, PhraseQuery pq)
: base(pq)
{
this.outerInstance = outerInstance;
}
protected override float Combine(float firstPassScore, bool secondPassMatches, float secondPassScore)
{
float score = firstPassScore;
if (secondPassMatches)
{
score -= (float)(2.0 * secondPassScore);
}
return score;
}
}
[Test]
public virtual void TestExplain()
{
Directory dir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewStringField("id", "0", Field.Store.YES));
doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.AddDocument(doc);
IndexReader r = w.GetReader();
w.Dispose();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = GetSearcher(r);
TopDocs hits = searcher.Search(bq, 10);
Assert.AreEqual(2, hits.TotalHits);
Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));
// Now, resort using PhraseQuery:
PhraseQuery pq = new PhraseQuery();
pq.Add(new Term("field", "wizard"));
pq.Add(new Term("field", "oz"));
Rescorer rescorer = new QueryRescorerAnonymousClass2(this, pq);
TopDocs hits2 = rescorer.Rescore(searcher, hits, 10);
// Resorting changed the order:
Assert.AreEqual(2, hits2.TotalHits);
Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));
int docID = hits2.ScoreDocs[0].Doc;
Explanation explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID);
string s = explain.ToString();
Assert.IsTrue(s.Contains("TestQueryRescorer+"));
Assert.IsTrue(s.Contains("combined first and second pass score"));
Assert.IsTrue(s.Contains("first pass score"));
Assert.IsTrue(s.Contains("= second pass score"));
Assert.AreEqual(hits2.ScoreDocs[0].Score, explain.Value, 0.0f);
docID = hits2.ScoreDocs[1].Doc;
explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID);
s = explain.ToString();
Assert.IsTrue(s.Contains("TestQueryRescorer+"));
Assert.IsTrue(s.Contains("combined first and second pass score"));
Assert.IsTrue(s.Contains("first pass score"));
Assert.IsTrue(s.Contains("no second pass score"));
Assert.IsFalse(s.Contains("= second pass score"));
Assert.IsTrue(s.Contains("NON-MATCH"));
Assert.IsTrue(Math.Abs(hits2.ScoreDocs[1].Score - explain.Value) < 0.0000001f);
r.Dispose();
dir.Dispose();
}
private class QueryRescorerAnonymousClass2 : QueryRescorer
{
private readonly TestQueryRescorer outerInstance;
public QueryRescorerAnonymousClass2(TestQueryRescorer outerInstance, PhraseQuery pq)
: base(pq)
{
this.outerInstance = outerInstance;
}
protected override float Combine(float firstPassScore, bool secondPassMatches, float secondPassScore)
{
float score = firstPassScore;
if (secondPassMatches)
{
score += (float)(2.0 * secondPassScore);
}
return score;
}
}
[Test]
public virtual void TestMissingSecondPassScore()
{
Directory dir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewStringField("id", "0", Field.Store.YES));
doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.AddDocument(doc);
IndexReader r = w.GetReader();
w.Dispose();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = GetSearcher(r);
TopDocs hits = searcher.Search(bq, 10);
Assert.AreEqual(2, hits.TotalHits);
Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));
// Now, resort using PhraseQuery, no slop:
PhraseQuery pq = new PhraseQuery();
pq.Add(new Term("field", "wizard"));
pq.Add(new Term("field", "oz"));
TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10);
// Resorting changed the order:
Assert.AreEqual(2, hits2.TotalHits);
Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));
// Resort using SpanNearQuery:
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);
TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10);
// Resorting changed the order:
Assert.AreEqual(2, hits3.TotalHits);
Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id"));
Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id"));
r.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestRandom()
{
Directory dir = NewDirectory();
int numDocs = AtLeast(1000);
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
int[] idToNum = new int[numDocs];
int maxValue = TestUtil.NextInt32(Random, 10, 1000000);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
doc.Add(NewStringField("id", "" + i, Field.Store.YES));
int numTokens = TestUtil.NextInt32(Random, 1, 10);
StringBuilder b = new StringBuilder();
for (int j = 0; j < numTokens; j++)
{
b.Append("a ");
}
doc.Add(NewTextField("field", b.ToString(), Field.Store.NO));
idToNum[i] = Random.Next(maxValue);
doc.Add(new NumericDocValuesField("num", idToNum[i]));
w.AddDocument(doc);
}
IndexReader r = w.GetReader();
w.Dispose();
IndexSearcher s = NewSearcher(r);
int numHits = TestUtil.NextInt32(Random, 1, numDocs);
bool reverse = Random.NextBoolean();
//System.out.println("numHits=" + numHits + " reverse=" + reverse);
TopDocs hits = s.Search(new TermQuery(new Term("field", "a")), numHits);
TopDocs hits2 = new QueryRescorerAnonymousClass3(this, new FixedScoreQuery(idToNum, reverse))
.Rescore(s, hits, numHits);
int[] expected = new int[numHits];
for (int i = 0; i < numHits; i++)
{
expected[i] = hits.ScoreDocs[i].Doc;
}
int reverseInt = reverse ? -1 : 1;
Array.Sort(expected,
Comparer<int>.Create((a, b) =>
{
int av = idToNum[Convert.ToInt32(r.Document(a).Get("id"))];
int bv = idToNum[Convert.ToInt32(r.Document(b).Get("id"))];
if (av < bv)
{
return -reverseInt;
}
else if (bv < av)
{
return reverseInt;
}
else
{
// Tie break by docID, ascending
return a - b;
}
})
);
bool fail = false;
for (int i = 0; i < numHits; i++)
{
//System.out.println("expected=" + expected[i] + " vs " + hits2.ScoreDocs[i].Doc + " v=" + idToNum[Integer.parseInt(r.Document(expected[i]).Get("id"))]);
if ((int)expected[i] != hits2.ScoreDocs[i].Doc)
{
//System.out.println(" diff!");
fail = true;
}
}
Assert.IsFalse(fail);
r.Dispose();
dir.Dispose();
}
private class QueryRescorerAnonymousClass3 : QueryRescorer
{
private readonly TestQueryRescorer outerInstance;
public QueryRescorerAnonymousClass3(TestQueryRescorer outerInstance, FixedScoreQuery fixedScoreQuery)
: base(fixedScoreQuery)
{
this.outerInstance = outerInstance;
}
protected override float Combine(float firstPassScore, bool secondPassMatches, float secondPassScore)
{
return secondPassScore;
}
}
/// <summary>
/// Just assigns score == idToNum[doc("id")] for each doc. </summary>
private class FixedScoreQuery : Query
{
private readonly int[] idToNum;
private readonly bool reverse;
public FixedScoreQuery(int[] idToNum, bool reverse)
{
this.idToNum = idToNum;
this.reverse = reverse;
}
public override Weight CreateWeight(IndexSearcher searcher)
{
return new WeightAnonymousClass(this);
}
private class WeightAnonymousClass : Weight
{
private readonly FixedScoreQuery outerInstance;
public WeightAnonymousClass(FixedScoreQuery outerInstance)
{
this.outerInstance = outerInstance;
}
public override Query Query => outerInstance;
public override float GetValueForNormalization()
{
return 1.0f;
}
public override void Normalize(float queryNorm, float topLevelBoost)
{
}
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
{
return new ScorerAnonymousClass(this, context);
}
private class ScorerAnonymousClass : Scorer
{
private readonly WeightAnonymousClass outerInstance;
private readonly AtomicReaderContext context;
public ScorerAnonymousClass(WeightAnonymousClass outerInstance, AtomicReaderContext context)
: base(null)
{
this.outerInstance = outerInstance;
this.context = context;
docID = -1;
}
internal int docID;
public override int DocID => docID;
public override int Freq => 1;
public override long GetCost()
{
return 1;
}
public override int NextDoc()
{
docID++;
if (docID >= context.Reader.MaxDoc)
{
return NO_MORE_DOCS;
}
return docID;
}
public override int Advance(int target)
{
docID = target;
return docID;
}
public override float GetScore()
{
int num = outerInstance.outerInstance.idToNum[Convert.ToInt32(context.Reader.Document(docID).Get("id"))];
if (outerInstance.outerInstance.reverse)
{
//System.out.println("score doc=" + docID + " num=" + num);
return num;
}
else
{
//System.out.println("score doc=" + docID + " num=" + -num);
return -num;
}
}
}
public override Explanation Explain(AtomicReaderContext context, int doc)
{
return null;
}
}
public override void ExtractTerms(ISet<Term> terms)
{
}
public override string ToString(string field)
{
return "FixedScoreQuery " + idToNum.Length + " ids; reverse=" + reverse;
}
public override bool Equals(object o)
{
if ((o is FixedScoreQuery) == false)
{
return false;
}
FixedScoreQuery other = (FixedScoreQuery)o;
return J2N.BitConversion.SingleToInt32Bits(Boost) == J2N.BitConversion.SingleToInt32Bits(other.Boost) && reverse == other.reverse && Arrays.Equals(idToNum, other.idToNum);
}
public override object Clone()
{
return new FixedScoreQuery(idToNum, reverse);
}
public override int GetHashCode()
{
int PRIME = 31;
int hash = base.GetHashCode();
if (reverse)
{
hash = PRIME * hash + 3623;
}
hash = PRIME * hash + Arrays.GetHashCode(idToNum);
return hash;
}
}
}
}