blob: a5efe87e9a9a2dd5321e6e3f4a404d8c656b49f0 [file] [log] [blame]
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Support;
using NUnit.Framework;
using System;
using System.Collections;
using Assert = Lucene.Net.TestFramework.Assert;
using BitSet = J2N.Collections.BitSet;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using Directory = Lucene.Net.Store.Directory;
using DirectoryReader = Lucene.Net.Index.DirectoryReader;
using DocIdBitSet = Lucene.Net.Util.DocIdBitSet;
using Document = Documents.Document;
using Field = Field;
using IBits = Lucene.Net.Util.IBits;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using OpenMode = Lucene.Net.Index.OpenMode;
using Term = Lucene.Net.Index.Term;
[TestFixture]
public class TestScorerPerf : LuceneTestCase
{
internal bool validate = true; // set to false when doing performance testing
internal BitSet[] sets;
internal Term[] terms;
internal IndexSearcher s;
internal IndexReader r;
internal Directory d;
// TODO: this should be setUp()....
public virtual void CreateDummySearcher()
{
// Create a dummy index with nothing in it.
// this could possibly fail if Lucene starts checking for docid ranges...
d = NewDirectory();
IndexWriter iw = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
iw.AddDocument(new Document());
iw.Dispose();
r = DirectoryReader.Open(d);
s = NewSearcher(r);
}
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir)
{
int[] freq = new int[nTerms];
terms = new Term[nTerms];
for (int i = 0; i < nTerms; i++)
{
int f = (nTerms + 1) - i; // make first terms less frequent
freq[i] = (int)Math.Ceiling(Math.Pow(f, power));
terms[i] = new Term("f", char.ToString((char)('A' + i)));
}
IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE));
for (int i = 0; i < nDocs; i++)
{
Document d = new Document();
for (int j = 0; j < nTerms; j++)
{
if (Random.Next(freq[j]) == 0)
{
d.Add(NewStringField("f", terms[j].Text(), Field.Store.NO));
//System.out.println(d);
}
}
iw.AddDocument(d);
}
iw.ForceMerge(1);
iw.Dispose();
}
public virtual BitSet RandBitSet(int sz, int numBitsToSet)
{
BitSet set = new BitSet(sz);
for (int i = 0; i < numBitsToSet; i++)
{
set.Set(Random.Next(sz));
}
return set;
}
public virtual BitSet[] RandBitSets(int numSets, int setSize)
{
BitSet[] sets = new BitSet[numSets];
for (int i = 0; i < sets.Length; i++)
{
sets[i] = RandBitSet(setSize, Random.Next(setSize));
}
return sets;
}
public class CountingHitCollector : ICollector
{
internal int count = 0;
internal int sum = 0;
protected internal int docBase = 0;
public virtual void SetScorer(Scorer scorer)
{
}
public virtual void Collect(int doc)
{
count++;
sum += docBase + doc; // use it to avoid any possibility of being eliminated by hotspot
}
public virtual int Count => count;
public virtual int Sum => sum;
public virtual void SetNextReader(AtomicReaderContext context)
{
docBase = context.DocBase;
}
public virtual bool AcceptsDocsOutOfOrder => true;
}
public class MatchingHitCollector : CountingHitCollector
{
internal BitSet answer;
internal int pos = -1;
public MatchingHitCollector(BitSet answer)
{
this.answer = answer;
}
public virtual void Collect(int doc, float score)
{
pos = answer.NextSetBit(pos + 1);
if (pos != doc + docBase)
{
throw new Exception("Expected doc " + pos + " but got " + doc + docBase);
}
base.Collect(doc);
}
}
internal virtual BitSet AddClause(BooleanQuery bq, BitSet result)
{
BitSet rnd = sets[Random.Next(sets.Length)];
Query q = new ConstantScoreQuery(new FilterAnonymousInnerClassHelper(rnd));
bq.Add(q, Occur.MUST);
if (validate)
{
if (result == null)
{
result = (BitSet)rnd.Clone();
}
else
{
result.And(rnd);
}
}
return result;
}
private class FilterAnonymousInnerClassHelper : Filter
{
private readonly BitSet rnd;
public FilterAnonymousInnerClassHelper(BitSet rnd)
{
this.rnd = rnd;
}
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
{
Assert.IsNull(acceptDocs, "acceptDocs should be null, as we have an index without deletions");
return new DocIdBitSet(rnd);
}
}
public virtual int DoConjunctions(int iter, int maxClauses)
{
int ret = 0;
for (int i = 0; i < iter; i++)
{
int nClauses = Random.Next(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
BitSet result = null;
for (int j = 0; j < nClauses; j++)
{
result = AddClause(bq, result);
}
CountingHitCollector hc = validate ? new MatchingHitCollector(result) : new CountingHitCollector();
s.Search(bq, hc);
ret += hc.Sum;
if (validate)
{
Assert.AreEqual(result.Cardinality, hc.Count);
}
// System.out.println(hc.getCount());
}
return ret;
}
public virtual int DoNestedConjunctions(int iter, int maxOuterClauses, int maxClauses)
{
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++)
{
int oClauses = Random.Next(maxOuterClauses - 1) + 2;
BooleanQuery oq = new BooleanQuery();
BitSet result = null;
for (int o = 0; o < oClauses; o++)
{
int nClauses = Random.Next(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
for (int j = 0; j < nClauses; j++)
{
result = AddClause(bq, result);
}
oq.Add(bq, Occur.MUST);
} // outer
CountingHitCollector hc = validate ? new MatchingHitCollector(result) : new CountingHitCollector();
s.Search(oq, hc);
nMatches += hc.Count;
ret += hc.Sum;
if (validate)
{
Assert.AreEqual(result.Cardinality, hc.Count);
}
// System.out.println(hc.getCount());
}
if (Verbose)
{
Console.WriteLine("Average number of matches=" + (nMatches / iter));
}
return ret;
}
public virtual int DoTermConjunctions(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
{
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++)
{
int nClauses = Random.Next(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
BitArray termflag = new BitArray(termsInIndex);
for (int j = 0; j < nClauses; j++)
{
int tnum;
// don't pick same clause twice
tnum = Random.Next(termsInIndex);
if (termflag.SafeGet(tnum))
{
tnum = termflag.NextClearBit(tnum);
}
if (tnum < 0 || tnum >= termsInIndex)
{
tnum = termflag.NextClearBit(0);
}
termflag.SafeSet(tnum, true);
Query tq = new TermQuery(terms[tnum]);
bq.Add(tq, Occur.MUST);
}
CountingHitCollector hc = new CountingHitCollector();
s.Search(bq, hc);
nMatches += hc.Count;
ret += hc.Sum;
}
if (Verbose)
{
Console.WriteLine("Average number of matches=" + (nMatches / iter));
}
return ret;
}
public virtual int DoNestedTermConjunctions(IndexSearcher s, int termsInIndex, int maxOuterClauses, int maxClauses, int iter)
{
int ret = 0;
long nMatches = 0;
for (int i = 0; i < iter; i++)
{
int oClauses = Random.Next(maxOuterClauses - 1) + 2;
BooleanQuery oq = new BooleanQuery();
for (int o = 0; o < oClauses; o++)
{
int nClauses = Random.Next(maxClauses - 1) + 2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
BitArray termflag = new BitArray(termsInIndex);
for (int j = 0; j < nClauses; j++)
{
int tnum;
// don't pick same clause twice
tnum = Random.Next(termsInIndex);
if (termflag.SafeGet(tnum))
{
tnum = termflag.NextClearBit(tnum);
}
if (tnum < 0 || tnum >= 25)
{
tnum = termflag.NextClearBit(0);
}
termflag.SafeSet(tnum, true);
Query tq = new TermQuery(terms[tnum]);
bq.Add(tq, Occur.MUST);
} // inner
oq.Add(bq, Occur.MUST);
} // outer
CountingHitCollector hc = new CountingHitCollector();
s.Search(oq, hc);
nMatches += hc.Count;
ret += hc.Sum;
}
if (Verbose)
{
Console.WriteLine("Average number of matches=" + (nMatches / iter));
}
return ret;
}
public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
{
int ret = 0;
for (int i = 0; i < iter; i++)
{
int nClauses = Random.Next(maxClauses - 1) + 2; // min 2 clauses
PhraseQuery q = new PhraseQuery();
for (int j = 0; j < nClauses; j++)
{
int tnum = Random.Next(termsInIndex);
q.Add(new Term("f", char.ToString((char)(tnum + 'A'))), j);
}
q.Slop = termsInIndex; // this could be random too
CountingHitCollector hc = new CountingHitCollector();
s.Search(q, hc);
ret += hc.Sum;
}
return ret;
}
[Test]
public virtual void TestConjunctions()
{
// test many small sets... the bugs will be found on boundary conditions
CreateDummySearcher();
validate = true;
sets = RandBitSets(AtLeast(1000), AtLeast(10));
DoConjunctions(AtLeast(10000), AtLeast(5));
DoNestedConjunctions(AtLeast(10000), AtLeast(3), AtLeast(3));
r.Dispose();
d.Dispose();
}
/*
///*
/// int bigIter=10;
///
/// public void testConjunctionPerf() throws Exception {
/// r = newRandom();
/// createDummySearcher();
/// validate=false;
/// sets=randBitSets(32,1000000);
/// for (int i=0; i<bigIter; i++) {
/// long start = DateTime.Now.Millisecond;
/// doConjunctions(500,6);
/// long end = DateTime.Now.Millisecond;
/// if (VERBOSE) System.out.println("milliseconds="+(end-start));
/// }
/// s.Dispose();
/// }
///
/// public void testNestedConjunctionPerf() throws Exception {
/// r = newRandom();
/// createDummySearcher();
/// validate=false;
/// sets=randBitSets(32,1000000);
/// for (int i=0; i<bigIter; i++) {
/// long start = DateTime.Now.Millisecond;
/// doNestedConjunctions(500,3,3);
/// long end = DateTime.Now.Millisecond;
/// if (VERBOSE) System.out.println("milliseconds="+(end-start));
/// }
/// s.Dispose();
/// }
///
///
/// public void testConjunctionTerms() throws Exception {
/// r = newRandom();
/// validate=false;
/// RAMDirectory dir = new RAMDirectory();
/// if (VERBOSE) System.out.println("Creating index");
/// createRandomTerms(100000,25,.5, dir);
/// s = NewSearcher(dir, true);
/// if (VERBOSE) System.out.println("Starting performance test");
/// for (int i=0; i<bigIter; i++) {
/// long start = DateTime.Now.Millisecond;
/// doTermConjunctions(s,25,5,1000);
/// long end = DateTime.Now.Millisecond;
/// if (VERBOSE) System.out.println("milliseconds="+(end-start));
/// }
/// s.Dispose();
/// }
///
/// public void testNestedConjunctionTerms() throws Exception {
/// r = newRandom();
/// validate=false;
/// RAMDirectory dir = new RAMDirectory();
/// if (VERBOSE) System.out.println("Creating index");
/// createRandomTerms(100000,25,.2, dir);
/// s = NewSearcher(dir, true);
/// if (VERBOSE) System.out.println("Starting performance test");
/// for (int i=0; i<bigIter; i++) {
/// long start = DateTime.Now.Millisecond;
/// doNestedTermConjunctions(s,25,3,3,200);
/// long end = DateTime.Now.Millisecond;
/// if (VERBOSE) System.out.println("milliseconds="+(end-start));
/// }
/// s.Dispose();
/// }
///
///
/// public void testSloppyPhrasePerf() throws Exception {
/// r = newRandom();
/// validate=false;
/// RAMDirectory dir = new RAMDirectory();
/// if (VERBOSE) System.out.println("Creating index");
/// createRandomTerms(100000,25,2,dir);
/// s = NewSearcher(dir, true);
/// if (VERBOSE) System.out.println("Starting performance test");
/// for (int i=0; i<bigIter; i++) {
/// long start = DateTime.Now.Millisecond;
/// doSloppyPhrase(s,25,2,1000);
/// long end = DateTime.Now.Millisecond;
/// if (VERBOSE) System.out.println("milliseconds="+(end-start));
/// }
/// s.Dispose();
/// }
/// **
*/
}
}