blob: b0a367a4041f0d5befe96f49f9605a1cb540a790 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
public class TestScorerPerf extends LuceneTestCase {
boolean validate = true; // set to false when doing performance testing
FixedBitSet[] sets;
Term[] terms;
IndexSearcher s;
IndexReader r;
Directory d;
// TODO: this should be setUp()....
public void createDummySearcher() throws Exception {
// Create a dummy index with nothing in it.
// This could possibly fail if Lucene starts checking for docid ranges...
d = newDirectory();
IndexWriter iw = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
iw.addDocument(new Document());
iw.close();
r = DirectoryReader.open(d);
s = newSearcher(r);
s.setQueryCache(null);
}
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
int[] freq = new int[nTerms];
terms = new Term[nTerms];
for (int i=0; i<nTerms; i++) {
int f = (nTerms+1)-i; // make first terms less frequent
freq[i] = (int)Math.ceil(Math.pow(f,power));
terms[i] = new Term("f",Character.toString((char)('A'+i)));
}
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
for (int i=0; i<nDocs; i++) {
Document d = new Document();
for (int j=0; j<nTerms; j++) {
if (random().nextInt(freq[j]) == 0) {
d.add(newStringField("f", terms[j].text(), Field.Store.NO));
//System.out.println(d);
}
}
iw.addDocument(d);
}
iw.forceMerge(1);
iw.close();
}
public FixedBitSet randBitSet(int sz, int numBitsToSet) {
FixedBitSet set = new FixedBitSet(sz);
for (int i=0; i<numBitsToSet; i++) {
set.set(random().nextInt(sz));
}
return set;
}
public FixedBitSet[] randBitSets(int numSets, int setSize) {
FixedBitSet[] sets = new FixedBitSet[numSets];
for (int i=0; i<sets.length; i++) {
sets[i] = randBitSet(setSize, random().nextInt(setSize));
}
return sets;
}
public static class CountingHitCollector extends SimpleCollector {
int count=0;
int sum=0;
protected int docBase = 0;
@Override
public void collect(int doc) {
count++;
sum += docBase + doc; // use it to avoid any possibility of being eliminated by hotspot
}
public int getCount() { return count; }
public int getSum() { return sum; }
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
}
public static class MatchingHitCollector extends CountingHitCollector {
FixedBitSet answer;
int pos=-1;
public MatchingHitCollector(FixedBitSet answer) {
this.answer = answer;
}
public void collect(int doc, float score) {
pos = answer.nextSetBit(pos+1);
if (pos != doc + docBase) {
throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase);
}
super.collect(doc);
}
}
private static class BitSetQuery extends Query {
private final FixedBitSet docs;
BitSetQuery(FixedBitSet docs) {
this.docs = docs;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(docs, docs.approximateCardinality()));
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "randomBitSetFilter";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
docs.equals(((BitSetQuery) other).docs);
}
@Override
public int hashCode() {
return 31 * classHash() + docs.hashCode();
}
}
FixedBitSet addClause(BooleanQuery.Builder bq, FixedBitSet result) {
final FixedBitSet rnd = sets[random().nextInt(sets.length)];
Query q = new BitSetQuery(rnd);
bq.add(q, BooleanClause.Occur.MUST);
if (validate) {
if (result==null) result = rnd.clone();
else result.and(rnd);
}
return result;
}
public int doConjunctions(int iter, int maxClauses) throws IOException {
int ret=0;
for (int i=0; i<iter; i++) {
int nClauses = random().nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
FixedBitSet result=null;
for (int j=0; j<nClauses; j++) {
result = addClause(bq,result);
}
CountingHitCollector hc = validate ? new MatchingHitCollector(result)
: new CountingHitCollector();
s.search(bq.build(), hc);
ret += hc.getSum();
if (validate) assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
return ret;
}
public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
int ret=0;
long nMatches=0;
for (int i=0; i<iter; i++) {
int oClauses = random().nextInt(maxOuterClauses-1)+2;
BooleanQuery.Builder oq = new BooleanQuery.Builder();
FixedBitSet result=null;
for (int o=0; o<oClauses; o++) {
int nClauses = random().nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (int j=0; j<nClauses; j++) {
result = addClause(bq,result);
}
oq.add(bq.build(), BooleanClause.Occur.MUST);
} // outer
CountingHitCollector hc = validate ? new MatchingHitCollector(result)
: new CountingHitCollector();
s.search(oq.build(), hc);
nMatches += hc.getCount();
ret += hc.getSum();
if (validate) assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
return ret;
}
public int doTermConjunctions(IndexSearcher s,
int termsInIndex,
int maxClauses,
int iter
) throws IOException {
int ret=0;
long nMatches=0;
for (int i=0; i<iter; i++) {
int nClauses = random().nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BitSet termflag = new BitSet(termsInIndex);
for (int j=0; j<nClauses; j++) {
int tnum;
// don't pick same clause twice
tnum = random().nextInt(termsInIndex);
if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
if (tnum<0 || tnum>=termsInIndex) tnum=termflag.nextClearBit(0);
termflag.set(tnum);
Query tq = new TermQuery(terms[tnum]);
bq.add(tq, BooleanClause.Occur.MUST);
}
CountingHitCollector hc = new CountingHitCollector();
s.search(bq.build(), hc);
nMatches += hc.getCount();
ret += hc.getSum();
}
if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
return ret;
}
public int doNestedTermConjunctions(IndexSearcher s,
int termsInIndex,
int maxOuterClauses,
int maxClauses,
int iter
) throws IOException {
int ret=0;
long nMatches=0;
for (int i=0; i<iter; i++) {
int oClauses = random().nextInt(maxOuterClauses-1)+2;
BooleanQuery.Builder oq = new BooleanQuery.Builder();
for (int o=0; o<oClauses; o++) {
int nClauses = random().nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BitSet termflag = new BitSet(termsInIndex);
for (int j=0; j<nClauses; j++) {
int tnum;
// don't pick same clause twice
tnum = random().nextInt(termsInIndex);
if (termflag.get(tnum)) tnum=termflag.nextClearBit(tnum);
if (tnum<0 || tnum>=25) tnum=termflag.nextClearBit(0);
termflag.set(tnum);
Query tq = new TermQuery(terms[tnum]);
bq.add(tq, BooleanClause.Occur.MUST);
} // inner
oq.add(bq.build(), BooleanClause.Occur.MUST);
} // outer
CountingHitCollector hc = new CountingHitCollector();
s.search(oq.build(), hc);
nMatches += hc.getCount();
ret += hc.getSum();
}
if (VERBOSE) System.out.println("Average number of matches="+(nMatches/iter));
return ret;
}
public int doSloppyPhrase(IndexSearcher s,
int termsInIndex,
int maxClauses,
int iter
) throws IOException {
int ret=0;
for (int i=0; i<iter; i++) {
int nClauses = random().nextInt(maxClauses-1)+2; // min 2 clauses
PhraseQuery.Builder builder = new PhraseQuery.Builder();
for (int j=0; j<nClauses; j++) {
int tnum = random().nextInt(termsInIndex);
builder.add(new Term("f", Character.toString((char)(tnum+'A'))));
}
// slop could be random too
builder.setSlop(termsInIndex);
PhraseQuery q = builder.build();
CountingHitCollector hc = new CountingHitCollector();
s.search(q, hc);
ret += hc.getSum();
}
return ret;
}
public void testConjunctions() throws Exception {
// test many small sets... the bugs will be found on boundary conditions
createDummySearcher();
validate=true;
sets=randBitSets(atLeast(1000), atLeast(10));
doConjunctions(atLeast(10000), atLeast(5));
doNestedConjunctions(atLeast(10000), atLeast(3), atLeast(3));
r.close();
d.close();
}
/***
int bigIter=10;
public void testConjunctionPerf() throws Exception {
r = newRandom();
createDummySearcher();
validate=false;
sets=randBitSets(32,1000000);
for (int i=0; i<bigIter; i++) {
long start = System.currentTimeMillis();
doConjunctions(500,6);
long end = System.currentTimeMillis();
if (VERBOSE) System.out.println("milliseconds="+(end-start));
}
s.close();
}
public void testNestedConjunctionPerf() throws Exception {
r = newRandom();
createDummySearcher();
validate=false;
sets=randBitSets(32,1000000);
for (int i=0; i<bigIter; i++) {
long start = System.currentTimeMillis();
doNestedConjunctions(500,3,3);
long end = System.currentTimeMillis();
if (VERBOSE) System.out.println("milliseconds="+(end-start));
}
s.close();
}
public void testConjunctionTerms() throws Exception {
r = newRandom();
validate=false;
RAMDirectory dir = new RAMDirectory();
if (VERBOSE) System.out.println("Creating index");
createRandomTerms(100000,25,.5, dir);
s = newSearcher(dir, true);
if (VERBOSE) System.out.println("Starting performance test");
for (int i=0; i<bigIter; i++) {
long start = System.currentTimeMillis();
doTermConjunctions(s,25,5,1000);
long end = System.currentTimeMillis();
if (VERBOSE) System.out.println("milliseconds="+(end-start));
}
s.close();
}
public void testNestedConjunctionTerms() throws Exception {
r = newRandom();
validate=false;
RAMDirectory dir = new RAMDirectory();
if (VERBOSE) System.out.println("Creating index");
createRandomTerms(100000,25,.2, dir);
s = newSearcher(dir, true);
if (VERBOSE) System.out.println("Starting performance test");
for (int i=0; i<bigIter; i++) {
long start = System.currentTimeMillis();
doNestedTermConjunctions(s,25,3,3,200);
long end = System.currentTimeMillis();
if (VERBOSE) System.out.println("milliseconds="+(end-start));
}
s.close();
}
public void testSloppyPhrasePerf() throws Exception {
r = newRandom();
validate=false;
RAMDirectory dir = new RAMDirectory();
if (VERBOSE) System.out.println("Creating index");
createRandomTerms(100000,25,2,dir);
s = newSearcher(dir, true);
if (VERBOSE) System.out.println("Starting performance test");
for (int i=0; i<bigIter; i++) {
long start = System.currentTimeMillis();
doSloppyPhrase(s,25,2,1000);
long end = System.currentTimeMillis();
if (VERBOSE) System.out.println("milliseconds="+(end-start));
}
s.close();
}
***/
}