blob: ee1e80bd0bcd04840958aeada20a448d44427e8e [file] [log] [blame]
using System;
using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Queries;
using Lucene.Net.Randomized.Generators;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
namespace Lucene.Net.Tests.Queries
{
public class CommonTermsQueryTest : LuceneTestCase
{
[Test]
public void TestBasics()
{
Directory dir = NewDirectory();
MockAnalyzer analyzer = new MockAnalyzer(Random());
RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);
var docs = new string[]
{
@"this is the end of the world right", @"is this it or maybe not",
@"this is the end of the universe as we know it",
@"there is the famous restaurant at the end of the universe"
};
for (int i = 0; i < docs.Length; i++)
{
Document doc = new Document();
doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
w.AddDocument(doc);
}
IndexReader r = w.Reader;
IndexSearcher s = NewSearcher(r);
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 3);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 2);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 1);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "restaurant"));
query.Add(new Term("field", "universe"));
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 1);
assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
}
r.Dispose();
w.Dispose();
dir.Dispose();
}
[Test]
public void TestEqualsHashCode()
{
CommonTermsQuery query = new CommonTermsQuery(RandomOccur(Random()), RandomOccur(Random()), Random().NextFloat(), Random().NextBoolean());
int terms = AtLeast(2);
for (int i = 0; i < terms; i++)
{
query.Add(new Term(TestUtil.RandomRealisticUnicodeString(Random()), TestUtil.RandomRealisticUnicodeString(Random())));
}
QueryUtils.CheckHashEquals(query);
QueryUtils.CheckUnequal(new CommonTermsQuery(RandomOccur(Random()), RandomOccur(Random()), Random().NextFloat(), Random().NextBoolean()), query);
{
long seed = Random().NextLong();
Random r = new Random((int)seed);
CommonTermsQuery left = new CommonTermsQuery(RandomOccur(r), RandomOccur(r), r.NextFloat(), r.NextBoolean());
int leftTerms = AtLeast(r, 2);
for (int i = 0; i < leftTerms; i++)
{
left.Add(new Term(TestUtil.RandomRealisticUnicodeString(r), TestUtil.RandomRealisticUnicodeString(r)));
}
left.HighFreqMinimumNumberShouldMatch = r.nextInt(4);
left.LowFreqMinimumNumberShouldMatch = r.nextInt(4);
r = new Random((int)seed);
CommonTermsQuery right = new CommonTermsQuery(RandomOccur(r), RandomOccur(r), r.NextFloat(), r.NextBoolean());
int rightTerms = AtLeast(r, 2);
for (int i = 0; i < rightTerms; i++)
{
right.Add(new Term(TestUtil.RandomRealisticUnicodeString(r), TestUtil.RandomRealisticUnicodeString(r)));
}
right.HighFreqMinimumNumberShouldMatch = r.nextInt(4);
right.LowFreqMinimumNumberShouldMatch = r.nextInt(4);
QueryUtils.CheckEqual(left, right);
}
}
private static BooleanClause.Occur RandomOccur(Random random)
{
return random.NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
}
[Test]
public void TestNullTerm()
{
Random random = Random();
CommonTermsQuery query = new CommonTermsQuery(RandomOccur(random), RandomOccur(random), Random().NextFloat());
try
{
query.Add(null);
Fail(@"null values are not supported");
}
catch (ArgumentException ex)
{
}
}
[Test]
public void TestMinShouldMatch()
{
Directory dir = NewDirectory();
MockAnalyzer analyzer = new MockAnalyzer(Random());
RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);
string[] docs = new string[]
{
@"this is the end of the world right", @"is this it or maybe not",
@"this is the end of the universe as we know it",
@"there is the famous restaurant at the end of the universe"
};
for (int i = 0; i < docs.Length; i++)
{
Document doc = new Document();
doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
w.AddDocument(doc);
}
IndexReader r = w.Reader;
IndexSearcher s = NewSearcher(r);
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
query.LowFreqMinimumNumberShouldMatch = 0.5F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 1);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
query.LowFreqMinimumNumberShouldMatch = 2F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 1);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
query.LowFreqMinimumNumberShouldMatch = 0.49F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 3);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
query.LowFreqMinimumNumberShouldMatch = 1F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 3);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
query.LowFreqMinimumNumberShouldMatch = 1F;
query.HighFreqMinimumNumberShouldMatch = 4F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 3);
assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0F);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(new HashSet<string>(Arrays.AsList(@"2", @"3")), new HashSet<string>(Arrays.AsList(r.Document(search.ScoreDocs[1].Doc).Get(@"id"), r.Document(search.ScoreDocs[2].Doc).Get(@"id"))));
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "the"));
query.LowFreqMinimumNumberShouldMatch = 1F;
query.HighFreqMinimumNumberShouldMatch = 2F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 4);
}
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "the"));
query.LowFreqMinimumNumberShouldMatch = 1F;
query.HighFreqMinimumNumberShouldMatch = 2F;
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 2);
assertEquals(new HashSet<string>(Arrays.AsList(@"0", @"2")), new HashSet<string>(Arrays.AsList(r.Document(search.ScoreDocs[0].Doc).Get(@"id"), r.Document(search.ScoreDocs[1].Doc).Get(@"id"))));
}
r.Dispose();
w.Dispose();
dir.Dispose();
}
[Test]
public void TestIllegalOccur()
{
Random random = Random();
try
{
new CommonTermsQuery(BooleanClause.Occur.MUST_NOT, RandomOccur(random), Random().NextFloat());
Fail(@"MUST_NOT is not supproted");
}
catch (ArgumentException ex)
{
}
try
{
new CommonTermsQuery(RandomOccur(random), BooleanClause.Occur.MUST_NOT, Random().NextFloat());
Fail(@"MUST_NOT is not supproted");
}
catch (ArgumentException ex)
{
}
}
[Test]
public void TestExtend()
{
Directory dir = NewDirectory();
MockAnalyzer analyzer = new MockAnalyzer(Random());
RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);
var docs = new string[]
{
@"this is the end of the world right", @"is this it or maybe not",
@"this is the end of the universe as we know it",
@"there is the famous restaurant at the end of the universe"
};
for (int i = 0; i < docs.Length; i++)
{
Document doc = new Document();
doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
w.AddDocument(doc);
}
IndexReader r = w.Reader;
IndexSearcher s = NewSearcher(r);
{
CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 3);
assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
}
{
CommonTermsQuery query = new ExtendedCommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
query.Add(new Term("field", "is"));
query.Add(new Term("field", "this"));
query.Add(new Term("field", "end"));
query.Add(new Term("field", "world"));
query.Add(new Term("field", "universe"));
query.Add(new Term("field", "right"));
TopDocs search = s.Search(query, 10);
assertEquals(search.TotalHits, 3);
assertEquals(@"2", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
assertEquals(@"3", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
assertEquals(@"0", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
}
r.Dispose();
w.Dispose();
dir.Dispose();
}
/*
* LUCENENET TODO requires a better comparator implementation for PriorityQueue
[Test]
public void TestRandomIndex()
{
Directory dir = NewDirectory();
MockAnalyzer analyzer = new MockAnalyzer(Random());
analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer);
CreateRandomIndex(AtLeast(50), w, Random().NextLong());
DirectoryReader reader = w.Reader;
AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader);
string field = @"body";
Terms terms = wrapper.Terms(field);
var lowFreqQueue = new AnonymousPriorityQueue(this, 5);
Util.PriorityQueue<TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5);
try
{
TermsEnum iterator = terms.Iterator(null);
while (iterator.Next() != null)
{
if (highFreqQueue.Size() < 5)
{
highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term()), iterator.DocFreq()));
lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term()), iterator.DocFreq()));
}
else
{
if (highFreqQueue.Top().freq < iterator.DocFreq())
{
highFreqQueue.Top().freq = iterator.DocFreq();
highFreqQueue.Top().term = BytesRef.DeepCopyOf(iterator.Term());
highFreqQueue.UpdateTop();
}
if (lowFreqQueue.Top().freq > iterator.DocFreq())
{
lowFreqQueue.Top().freq = iterator.DocFreq();
lowFreqQueue.Top().term = BytesRef.DeepCopyOf(iterator.Term());
lowFreqQueue.UpdateTop();
}
}
}
int lowFreq = lowFreqQueue.Top().freq;
int highFreq = highFreqQueue.Top().freq;
AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
List<TermAndFreq> highTerms = QueueToList(highFreqQueue);
List<TermAndFreq> lowTerms = QueueToList(lowFreqQueue);
IndexSearcher searcher = NewSearcher(reader);
BooleanClause.Occur lowFreqOccur = RandomOccur(Random());
BooleanQuery verifyQuery = new BooleanQuery();
CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random()), lowFreqOccur, highFreq - 1, Random().NextBoolean());
foreach (TermAndFreq termAndFreq in lowTerms)
{
cq.Add(new Term(field, termAndFreq.term));
verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
}
foreach (TermAndFreq termAndFreq in highTerms)
{
cq.Add(new Term(field, termAndFreq.term));
}
TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);
TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
var hits = new HashSet<int>();
foreach (ScoreDoc doc in verifySearch.ScoreDocs)
{
hits.Add(doc.Doc);
}
foreach (ScoreDoc doc in cqSearch.ScoreDocs)
{
assertTrue(hits.Remove(doc.Doc));
}
assertTrue(hits.IsEmpty());
w.ForceMerge(1);
DirectoryReader reader2 = w.Reader;
QueryUtils.Check(Random(), cq, NewSearcher(reader2));
reader2.Dispose();
}
finally
{
reader.Dispose();
wrapper.Dispose();
w.Dispose();
dir.Dispose();
}
}
private sealed class AnonymousPriorityQueue : Support.PriorityQueue<TermAndFreq>
{
public AnonymousPriorityQueue(CommonTermsQueryTest parent)
{
this.parent = parent;
}
private readonly CommonTermsQueryTest parent;
protected override bool LessThan(TermAndFreq a, TermAndFreq b)
{
return a.freq > b.freq;
}
}
private sealed class AnonymousPriorityQueue1 : Support.PriorityQueue<TermAndFreq>
{
public AnonymousPriorityQueue1(CommonTermsQueryTest parent)
{
this.parent = parent;
}
private readonly CommonTermsQueryTest parent;
protected override bool LessThan(TermAndFreq a, TermAndFreq b)
{
return a.freq < b.freq;
}
}*/
private static List<TermAndFreq> QueueToList(Util.PriorityQueue<TermAndFreq> queue)
{
var terms = new List<TermAndFreq>();
while (queue.Size() > 0)
{
terms.Add(queue.Pop());
}
return terms;
}
private class TermAndFreq : IComparable<TermAndFreq>
{
public BytesRef term;
public int freq;
public TermAndFreq(BytesRef term, int freq)
{
this.term = term;
this.freq = freq;
}
public int CompareTo(TermAndFreq other)
{
return term.CompareTo(other.term) + freq.CompareTo(other.freq);
}
}
public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed)
{
Random random = new Random((int)seed);
// primary source for our data is from linefiledocs, its realistic.
LineFileDocs lineFileDocs = new LineFileDocs(random, false); // no docvalues in 4x
for (int i = 0; i < numdocs; i++)
{
writer.AddDocument(lineFileDocs.NextDoc());
}
lineFileDocs.Dispose();
}
private sealed class ExtendedCommonTermsQuery : CommonTermsQuery
{
public ExtendedCommonTermsQuery(BooleanClause.Occur highFreqOccur, BooleanClause.Occur lowFreqOccur, float maxTermFrequency)
: base(highFreqOccur, lowFreqOccur, maxTermFrequency)
{
}
protected override Query NewTermQuery(Term term, TermContext context)
{
Query query = base.NewTermQuery(term, context);
if (term.Text().Equals(@"universe"))
{
query.Boost = 100F;
}
return query;
}
}
}
}