blob: 0242db91e6598645a26db82b9fa182ae98a2009c [file] [log] [blame]
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestQueryRescorer extends LuceneTestCase {
private IndexSearcher getSearcher(IndexReader r) {
IndexSearcher searcher = newSearcher(r);
// We rely on more tokens = lower score:
searcher.setSimilarity(new DefaultSimilarity());
return searcher;
}
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = w.newDocument();
doc.addUniqueAtom("id", "0");
doc.addLargeText("field", "wizard the the the the the oz");
w.addDocument(doc);
doc = w.newDocument();
doc.addUniqueAtom("id", "1");
// 1 extra token, but wizard and oz are close;
doc.addLargeText("field", "wizard oz the the the the the the");
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
searcher.setSimilarity(new DefaultSimilarity());
TopDocs hits = searcher.search(bq, 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using PhraseQuery:
PhraseQuery pq = new PhraseQuery();
pq.setSlop(5);
pq.add(new Term("field", "wizard"));
pq.add(new Term("field", "oz"));
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits2.totalHits);
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
// Resort using SpanNearQuery:
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0, true);
TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits3.totalHits);
assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
r.close();
dir.close();
}
// Test LUCENE-5682
public void testNullScorerTermQuery() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = w.newDocument();
doc.addUniqueAtom("id", "0");
doc.addLargeText("field", "wizard the the the the the oz");
w.addDocument(doc);
doc = w.newDocument();
doc.addUniqueAtom("id", "1");
// 1 extra token, but wizard and oz are close;
doc.addLargeText("field", "wizard oz the the the the the the");
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
searcher.setSimilarity(new DefaultSimilarity());
TopDocs hits = searcher.search(bq, 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using TermQuery on term that does not exist.
TermQuery tq = new TermQuery(new Term("field", "gold"));
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, tq, 2.0, 10);
// Just testing that null scorer is handled.
assertEquals(2, hits2.totalHits);
r.close();
dir.close();
}
public void testCustomCombine() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = w.newDocument();
doc.addUniqueAtom("id", "0");
doc.addLargeText("field", "wizard the the the the the oz");
w.addDocument(doc);
doc = w.newDocument();
doc.addUniqueAtom("id", "1");
// 1 extra token, but wizard and oz are close;
doc.addLargeText("field", "wizard oz the the the the the the");
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
TopDocs hits = searcher.search(bq, 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using PhraseQuery, but with an
// opposite-world combine:
PhraseQuery pq = new PhraseQuery();
pq.setSlop(5);
pq.add(new Term("field", "wizard"));
pq.add(new Term("field", "oz"));
TopDocs hits2 = new QueryRescorer(pq) {
@Override
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
float score = firstPassScore;
if (secondPassMatches) {
score -= 2.0 * secondPassScore;
}
return score;
}
}.rescore(searcher, hits, 10);
// Resorting didn't change the order:
assertEquals(2, hits2.totalHits);
assertEquals("0", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
r.close();
dir.close();
}
public void testExplain() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = w.newDocument();
doc.addUniqueAtom("id", "0");
doc.addLargeText("field", "wizard the the the the the oz");
w.addDocument(doc);
doc = w.newDocument();
doc.addUniqueAtom("id", "1");
// 1 extra token, but wizard and oz are close;
doc.addLargeText("field", "wizard oz the the the the the the");
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
TopDocs hits = searcher.search(bq, 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using PhraseQuery:
PhraseQuery pq = new PhraseQuery();
pq.add(new Term("field", "wizard"));
pq.add(new Term("field", "oz"));
Rescorer rescorer = new QueryRescorer(pq) {
@Override
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
float score = firstPassScore;
if (secondPassMatches) {
score += 2.0 * secondPassScore;
}
return score;
}
};
TopDocs hits2 = rescorer.rescore(searcher, hits, 10);
// Resorting changed the order:
assertEquals(2, hits2.totalHits);
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
int docID = hits2.scoreDocs[0].doc;
Explanation explain = rescorer.explain(searcher,
searcher.explain(bq, docID),
docID);
String s = explain.toString();
assertTrue(s.contains("TestQueryRescorer$"));
assertTrue(s.contains("combined first and second pass score"));
assertTrue(s.contains("first pass score"));
assertTrue(s.contains("= second pass score"));
assertEquals(hits2.scoreDocs[0].score, explain.getValue(), 0.0f);
docID = hits2.scoreDocs[1].doc;
explain = rescorer.explain(searcher,
searcher.explain(bq, docID),
docID);
s = explain.toString();
assertTrue(s.contains("TestQueryRescorer$"));
assertTrue(s.contains("combined first and second pass score"));
assertTrue(s.contains("first pass score"));
assertTrue(s.contains("no second pass score"));
assertFalse(s.contains("= second pass score"));
assertTrue(s.contains("NON-MATCH"));
assertEquals(hits2.scoreDocs[1].score, explain.getValue(), 0.0f);
r.close();
dir.close();
}
public void testMissingSecondPassScore() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = w.newDocument();
doc.addUniqueAtom("id", "0");
doc.addLargeText("field", "wizard the the the the the oz");
w.addDocument(doc);
doc = w.newDocument();
doc.addUniqueAtom("id", "1");
// 1 extra token, but wizard and oz are close;
doc.addLargeText("field", "wizard oz the the the the the the");
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
TopDocs hits = searcher.search(bq, 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using PhraseQuery, no slop:
PhraseQuery pq = new PhraseQuery();
pq.add(new Term("field", "wizard"));
pq.add(new Term("field", "oz"));
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits2.totalHits);
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
// Resort using SpanNearQuery:
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0, true);
TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits3.totalHits);
assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
r.close();
dir.close();
}
public void testRandom() throws Exception {
Directory dir = newDirectory();
int numDocs = atLeast(1000);
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int[] idToNum = new int[numDocs];
int maxValue = TestUtil.nextInt(random(), 10, 1000000);
for(int i=0;i<numDocs;i++) {
Document doc = w.newDocument();
doc.addUniqueInt("id", i);
int numTokens = TestUtil.nextInt(random(), 1, 10);
StringBuilder b = new StringBuilder();
for(int j=0;j<numTokens;j++) {
b.append("a ");
}
doc.addLargeText("field", b.toString());
idToNum[i] = random().nextInt(maxValue);
doc.addInt("num", idToNum[i]);
w.addDocument(doc);
}
final IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int numHits = TestUtil.nextInt(random(), 1, numDocs);
boolean reverse = random().nextBoolean();
//System.out.println("numHits=" + numHits + " reverse=" + reverse);
TopDocs hits = s.search(new TermQuery(new Term("field", "a")), numHits);
TopDocs hits2 = new QueryRescorer(new FixedScoreQuery(idToNum, reverse)) {
@Override
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
return secondPassScore;
}
}.rescore(s, hits, numHits);
Integer[] expected = new Integer[numHits];
for(int i=0;i<numHits;i++) {
expected[i] = hits.scoreDocs[i].doc;
}
final int reverseInt = reverse ? -1 : 1;
Arrays.sort(expected,
new Comparator<Integer>() {
@Override
public int compare(Integer a, Integer b) {
try {
int av = idToNum[r.document(a).getInt("id")];
int bv = idToNum[r.document(b).getInt("id")];
if (av < bv) {
return -reverseInt;
} else if (bv < av) {
return reverseInt;
} else {
// Tie break by docID, ascending
return a - b;
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
});
boolean fail = false;
for(int i=0;i<numHits;i++) {
//System.out.println("expected=" + expected[i] + " vs " + hits2.scoreDocs[i].doc + " v=" + idToNum[Integer.parseInt(r.document(expected[i]).get("id"))]);
if (expected[i].intValue() != hits2.scoreDocs[i].doc) {
//System.out.println(" diff!");
fail = true;
}
}
assertFalse(fail);
r.close();
dir.close();
}
/** Just assigns score == idToNum[doc("id")] for each doc. */
private static class FixedScoreQuery extends Query {
private final int[] idToNum;
private final boolean reverse;
public FixedScoreQuery(int[] idToNum, boolean reverse) {
this.idToNum = idToNum;
this.reverse = reverse;
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new Weight() {
@Override
public Query getQuery() {
return FixedScoreQuery.this;
}
@Override
public float getValueForNormalization() {
return 1.0f;
}
@Override
public void normalize(float queryNorm, float topLevelBoost) {
}
@Override
public Scorer scorer(final LeafReaderContext context, Bits acceptDocs) throws IOException {
return new Scorer(null) {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int freq() {
return 1;
}
@Override
public long cost() {
return 1;
}
@Override
public int nextDoc() {
docID++;
if (docID >= context.reader().maxDoc()) {
return NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
docID = target;
return docID;
}
@Override
public float score() throws IOException {
int num = idToNum[context.reader().document(docID).getInt("id")];
if (reverse) {
//System.out.println("score doc=" + docID + " num=" + num);
return num;
} else {
//System.out.println("score doc=" + docID + " num=" + -num);
return -num;
}
}
};
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
}
};
}
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public String toString(String field) {
return "FixedScoreQuery " + idToNum.length + " ids; reverse=" + reverse;
}
@Override
public boolean equals(Object o) {
if ((o instanceof FixedScoreQuery) == false) {
return false;
}
FixedScoreQuery other = (FixedScoreQuery) o;
return Float.floatToIntBits(getBoost()) == Float.floatToIntBits(other.getBoost()) &&
reverse == other.reverse &&
Arrays.equals(idToNum, other.idToNum);
}
@Override
public Query clone() {
return new FixedScoreQuery(idToNum, reverse);
}
@Override
public int hashCode() {
int PRIME = 31;
int hash = super.hashCode();
if (reverse) {
hash = PRIME * hash + 3623;
}
hash = PRIME * hash + Arrays.hashCode(idToNum);
return hash;
}
}
}