using J2N.Collections.Generic.Extensions;
using Lucene.Net.Diagnostics;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System.Collections.Generic;
using System.Linq;
using Assert = Lucene.Net.TestFramework.Assert;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search
using AtomicReader = Lucene.Net.Index.AtomicReader;
using BooleanWeight = Lucene.Net.Search.BooleanQuery.BooleanWeight;
using BytesRef = Lucene.Net.Util.BytesRef;
using DefaultSimilarity = Lucene.Net.Search.Similarities.DefaultSimilarity;
using Directory = Lucene.Net.Store.Directory;
using DirectoryReader = Lucene.Net.Index.DirectoryReader;
using Document = Documents.Document;
using Field = Field;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using SimScorer = Lucene.Net.Search.Similarities.Similarity.SimScorer;
using SimWeight = Lucene.Net.Search.Similarities.Similarity.SimWeight;
using SortedSetDocValues = Lucene.Net.Index.SortedSetDocValues;
using SortedSetDocValuesField = SortedSetDocValuesField;
using StringField = StringField;
using Term = Lucene.Net.Index.Term;
using TermContext = Lucene.Net.Index.TermContext;
using TestUtil = Lucene.Net.Util.TestUtil;
/// <summary>
/// tests BooleanScorer2's minShouldMatch </summary>
[SuppressCodecs("Appending", "Lucene3x", "Lucene40", "Lucene41")]
public class TestMinShouldMatch2 : LuceneTestCase
internal static Directory dir;
internal static DirectoryReader r;
internal static AtomicReader atomicReader;
internal static IndexSearcher searcher;
internal static readonly string[] alwaysTerms = new string[] { "a" };
internal static readonly string[] commonTerms = new string[] { "b", "c", "d" };
internal static readonly string[] mediumTerms = new string[] { "e", "f", "g" };
internal static readonly string[] rareTerms = new string[] { "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };
/// <summary>
/// LUCENENET specific
/// Is non-static because Similarity and TimeZone are not static.
/// </summary>
public override void BeforeClass()
dir = NewDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
Random, dir);
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
Document doc = new Document();
AddSome(doc, alwaysTerms);
if (Random.Next(100) < 90)
AddSome(doc, commonTerms);
if (Random.Next(100) < 50)
AddSome(doc, mediumTerms);
if (Random.Next(100) < 10)
AddSome(doc, rareTerms);
r = DirectoryReader.Open(dir);
atomicReader = GetOnlySegmentReader(r);
searcher = new IndexSearcher(atomicReader);
searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper();
private class DefaultSimilarityAnonymousInnerClassHelper : DefaultSimilarity
public DefaultSimilarityAnonymousInnerClassHelper()
public override float QueryNorm(float sumOfSquaredWeights)
return 1; // we disable queryNorm, both for debugging and ease of impl
public override void AfterClass()
searcher = null;
atomicReader = null;
r = null;
dir = null;
private static void AddSome(Document doc, string[] values)
IList<string> list = values.ToArray();
int howMany = TestUtil.NextInt32(Random, 1, list.Count);
for (int i = 0; i < howMany; i++)
doc.Add(new StringField("field", list[i], Field.Store.NO));
doc.Add(new SortedSetDocValuesField("dv", new BytesRef(list[i])));
private Scorer Scorer(string[] values, int minShouldMatch, bool slow)
BooleanQuery bq = new BooleanQuery();
foreach (string value in values)
bq.Add(new TermQuery(new Term("field", value)), Occur.SHOULD);
bq.MinimumNumberShouldMatch = minShouldMatch;
BooleanWeight weight = (BooleanWeight)searcher.CreateNormalizedWeight(bq);
if (slow)
return new SlowMinShouldMatchScorer(weight, atomicReader, searcher);
return weight.GetScorer((AtomicReaderContext)atomicReader.Context, null);
private void AssertNext(Scorer expected, Scorer actual)
if (actual == null)
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, expected.NextDoc());
int doc;
while ((doc = expected.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
Assert.AreEqual(doc, actual.NextDoc());
Assert.AreEqual(expected.Freq, actual.Freq);
float expectedScore = expected.GetScore();
float actualScore = actual.GetScore();
Assert.AreEqual(expectedScore, actualScore, CheckHits.ExplainToleranceDelta(expectedScore, actualScore));
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, actual.NextDoc());
private void AssertAdvance(Scorer expected, Scorer actual, int amount)
if (actual == null)
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, expected.NextDoc());
int prevDoc = 0;
int doc;
while ((doc = expected.Advance(prevDoc + amount)) != DocIdSetIterator.NO_MORE_DOCS)
Assert.AreEqual(doc, actual.Advance(prevDoc + amount));
Assert.AreEqual(expected.Freq, actual.Freq);
float expectedScore = expected.GetScore();
float actualScore = actual.GetScore();
Assert.AreEqual(expectedScore, actualScore, CheckHits.ExplainToleranceDelta(expectedScore, actualScore));
prevDoc = doc;
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, actual.Advance(prevDoc + amount));
/// <summary>
/// simple test for next(): minShouldMatch=2 on 3 terms (one common, one medium, one rare) </summary>
public virtual void TestNextCMR2()
for (int common = 0; common < commonTerms.Length; common++)
for (int medium = 0; medium < mediumTerms.Length; medium++)
for (int rare = 0; rare < rareTerms.Length; rare++)
Scorer expected = Scorer(new string[] { commonTerms[common], mediumTerms[medium], rareTerms[rare] }, 2, true);
Scorer actual = Scorer(new string[] { commonTerms[common], mediumTerms[medium], rareTerms[rare] }, 2, false);
AssertNext(expected, actual);
/// <summary>
/// simple test for advance(): minShouldMatch=2 on 3 terms (one common, one medium, one rare) </summary>
public virtual void TestAdvanceCMR2()
for (int amount = 25; amount < 200; amount += 25)
for (int common = 0; common < commonTerms.Length; common++)
for (int medium = 0; medium < mediumTerms.Length; medium++)
for (int rare = 0; rare < rareTerms.Length; rare++)
Scorer expected = Scorer(new string[] { commonTerms[common], mediumTerms[medium], rareTerms[rare] }, 2, true);
Scorer actual = Scorer(new string[] { commonTerms[common], mediumTerms[medium], rareTerms[rare] }, 2, false);
AssertAdvance(expected, actual, amount);
/// <summary>
/// test next with giant bq of all terms with varying minShouldMatch </summary>
public virtual void TestNextAllTerms()
IList<string> termsList = new List<string>(commonTerms.Length + mediumTerms.Length + rareTerms.Length);
string[] terms = termsList.ToArray();
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
Scorer expected = Scorer(terms, minNrShouldMatch, true);
Scorer actual = Scorer(terms, minNrShouldMatch, false);
AssertNext(expected, actual);
/// <summary>
/// test advance with giant bq of all terms with varying minShouldMatch </summary>
public virtual void TestAdvanceAllTerms()
IList<string> termsList = new List<string>(commonTerms.Length + mediumTerms.Length + rareTerms.Length);
string[] terms = termsList.ToArray();
for (int amount = 25; amount < 200; amount += 25)
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
Scorer expected = Scorer(terms, minNrShouldMatch, true);
Scorer actual = Scorer(terms, minNrShouldMatch, false);
AssertAdvance(expected, actual, amount);
/// <summary>
/// test next with varying numbers of terms with varying minShouldMatch </summary>
public virtual void TestNextVaryingNumberOfTerms()
IList<string> termsList = new List<string>(commonTerms.Length + mediumTerms.Length + rareTerms.Length);
for (int numTerms = 2; numTerms <= termsList.Count; numTerms++)
string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/);
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
Scorer expected = Scorer(terms, minNrShouldMatch, true);
Scorer actual = Scorer(terms, minNrShouldMatch, false);
AssertNext(expected, actual);
/// <summary>
/// test advance with varying numbers of terms with varying minShouldMatch </summary>
public virtual void TestAdvanceVaryingNumberOfTerms()
IList<string> termsList = new List<string>(commonTerms.Length + mediumTerms.Length + rareTerms.Length);
for (int amount = 25; amount < 200; amount += 25)
for (int numTerms = 2; numTerms <= termsList.Count; numTerms++)
string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/);
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++)
Scorer expected = Scorer(terms, minNrShouldMatch, true);
Scorer actual = Scorer(terms, minNrShouldMatch, false);
AssertAdvance(expected, actual, amount);
// TODO: more tests
// a slow min-should match scorer that uses a docvalues field.
// later, we can make debugging easier as it can record the set of ords it currently matched
// and e.g. print out their values and so on for the document
internal class SlowMinShouldMatchScorer : Scorer
internal int currentDoc = -1; // current docid
internal int currentMatched = -1; // current number of terms matched
internal readonly SortedSetDocValues dv;
internal readonly int maxDoc;
internal readonly ISet<long?> ords = new JCG.HashSet<long?>();
internal readonly SimScorer[] sims;
internal readonly int minNrShouldMatch;
internal double score = float.NaN;
internal SlowMinShouldMatchScorer(BooleanWeight weight, AtomicReader reader, IndexSearcher searcher)
: base(weight)
this.dv = reader.GetSortedSetDocValues("dv");
this.maxDoc = reader.MaxDoc;
BooleanQuery bq = (BooleanQuery)weight.Query;
this.minNrShouldMatch = bq.MinimumNumberShouldMatch;
this.sims = new SimScorer[(int)dv.ValueCount];
foreach (BooleanClause clause in bq.GetClauses())
if (Debugging.AssertsEnabled) Debugging.Assert(!clause.IsProhibited);
if (Debugging.AssertsEnabled) Debugging.Assert(!clause.IsRequired);
Term term = ((TermQuery)clause.Query).Term;
long ord = dv.LookupTerm(term.Bytes);
if (ord >= 0)
bool success = ords.Add(ord);
if (Debugging.AssertsEnabled) Debugging.Assert(success); // no dups
TermContext context = TermContext.Build(reader.Context, term);
SimWeight w = weight.Similarity.ComputeWeight(1f, searcher.CollectionStatistics("field"), searcher.TermStatistics(term, context));
var dummy = w.GetValueForNormalization(); // ignored
w.Normalize(1F, 1F);
sims[(int)ord] = weight.Similarity.GetSimScorer(w, (AtomicReaderContext)reader.Context);
public override float GetScore()
if (Debugging.AssertsEnabled) Debugging.Assert(score != 0, currentMatched.ToString());
return (float)score * ((BooleanWeight)m_weight).Coord(currentMatched, ((BooleanWeight)m_weight).MaxCoord);
public override int Freq => currentMatched;
public override int DocID => currentDoc;
public override int NextDoc()
if (Debugging.AssertsEnabled) Debugging.Assert(currentDoc != NO_MORE_DOCS);
for (currentDoc = currentDoc + 1; currentDoc < maxDoc; currentDoc++)
currentMatched = 0;
score = 0;
long ord;
while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
if (ords.Contains(ord))
score += sims[(int)ord].Score(currentDoc, 1);
if (currentMatched >= minNrShouldMatch)
return currentDoc;
return currentDoc = NO_MORE_DOCS;
public override int Advance(int target)
int doc;
while ((doc = NextDoc()) < target)
return doc;
public override long GetCost()
return maxDoc;