blob: bf47fff73d481eb272f2d6bcf7b22b712f08717a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.io.StringReader;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
/**
* Test of the DisjunctionMaxQuery.
*
*/
@LuceneTestCase.SuppressCodecs("SimpleText")
public class TestDisjunctionMaxQuery extends LuceneTestCase {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 0.0000f;
/**
* Similarity to eliminate tf, idf and lengthNorm effects to isolate test
* case.
*
* <p>
* same as TestRankingSimilarity in TestRanking.zip from
* http://issues.apache.org/jira/browse/LUCENE-323
* </p>
*/
private static class TestSimilarity extends ClassicSimilarity {
public TestSimilarity() {}
@Override
public float tf(float freq) {
if (freq > 0.0f) return 1.0f;
else return 0.0f;
}
@Override
public float lengthNorm(int length) {
// Disable length norm
return 1;
}
@Override
public float idf(long docFreq, long docCount) {
return 1.0f;
}
}
public Similarity sim = new TestSimilarity();
public Directory index;
public IndexReader r;
public IndexSearcher s;
private static final FieldType nonAnalyzedType = new FieldType(TextField.TYPE_STORED);
static {
nonAnalyzedType.setTokenized(false);
}
@Override
public void setUp() throws Exception {
super.setUp();
index = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), index,
newIndexWriterConfig(new MockAnalyzer(random()))
.setSimilarity(sim).setMergePolicy(newLogMergePolicy()));
// hed is the most important field, dek is secondary
// d1 is an "ok" match for: albino elephant
{
Document d1 = new Document();
d1.add(newField("id", "d1", nonAnalyzedType));// Field.Keyword("id",
// "d1"));
d1
.add(newTextField("hed", "elephant", Field.Store.YES));// Field.Text("hed", "elephant"));
d1
.add(newTextField("dek", "elephant", Field.Store.YES));// Field.Text("dek", "elephant"));
writer.addDocument(d1);
}
// d2 is a "good" match for: albino elephant
{
Document d2 = new Document();
d2.add(newField("id", "d2", nonAnalyzedType));// Field.Keyword("id",
// "d2"));
d2
.add(newTextField("hed", "elephant", Field.Store.YES));// Field.Text("hed", "elephant"));
d2.add(newTextField("dek", "albino", Field.Store.YES));// Field.Text("dek",
// "albino"));
d2
.add(newTextField("dek", "elephant", Field.Store.YES));// Field.Text("dek", "elephant"));
writer.addDocument(d2);
}
// d3 is a "better" match for: albino elephant
{
Document d3 = new Document();
d3.add(newField("id", "d3", nonAnalyzedType));// Field.Keyword("id",
// "d3"));
d3.add(newTextField("hed", "albino", Field.Store.YES));// Field.Text("hed",
// "albino"));
d3
.add(newTextField("hed", "elephant", Field.Store.YES));// Field.Text("hed", "elephant"));
writer.addDocument(d3);
}
// d4 is the "best" match for: albino elephant
{
Document d4 = new Document();
d4.add(newField("id", "d4", nonAnalyzedType));// Field.Keyword("id",
// "d4"));
d4.add(newTextField("hed", "albino", Field.Store.YES));// Field.Text("hed",
// "albino"));
d4
.add(newField("hed", "elephant", nonAnalyzedType));// Field.Text("hed", "elephant"));
d4.add(newTextField("dek", "albino", Field.Store.YES));// Field.Text("dek",
// "albino"));
writer.addDocument(d4);
}
writer.forceMerge(1);
r = getOnlyLeafReader(writer.getReader());
writer.close();
s = new IndexSearcher(r);
s.setSimilarity(sim);
}
@Override
public void tearDown() throws Exception {
r.close();
index.close();
super.tearDown();
}
public void testSkipToFirsttimeMiss() throws IOException {
final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(
Arrays.asList(tq("id", "d1"), tq("dek", "DOES_NOT_EXIST")), 0.0f);
QueryUtils.check(random(), dq, s);
assertTrue(s.getTopReaderContext() instanceof LeafReaderContext);
final Weight dw = s.createWeight(s.rewrite(dq), ScoreMode.COMPLETE, 1);
LeafReaderContext context = (LeafReaderContext)s.getTopReaderContext();
final Scorer ds = dw.scorer(context);
final boolean skipOk = ds.iterator().advance(3) != DocIdSetIterator.NO_MORE_DOCS;
if (skipOk) {
fail("firsttime skipTo found a match? ... "
+ r.document(ds.docID()).get("id"));
}
}
public void testSkipToFirsttimeHit() throws IOException {
final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(
Arrays.asList(tq("dek", "albino"), tq("dek", "DOES_NOT_EXIST")), 0.0f);
assertTrue(s.getTopReaderContext() instanceof LeafReaderContext);
QueryUtils.check(random(), dq, s);
final Weight dw = s.createWeight(s.rewrite(dq), ScoreMode.COMPLETE, 1);
LeafReaderContext context = (LeafReaderContext)s.getTopReaderContext();
final Scorer ds = dw.scorer(context);
assertTrue("firsttime skipTo found no match",
ds.iterator().advance(3) != DocIdSetIterator.NO_MORE_DOCS);
assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id"));
}
public void testSimpleEqualScores1() throws Exception {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "albino"), tq("hed", "elephant")),
0.0f);
QueryUtils.check(random(), q, s);
ScoreDoc[] h = s.search(q, 1000).scoreDocs;
try {
assertEquals("all docs should match " + q.toString(), 4, h.length);
float score = h[0].score;
for (int i = 1; i < h.length; i++) {
assertEquals("score #" + i + " is not the same", score, h[i].score,
SCORE_COMP_THRESH);
}
} catch (Error e) {
printHits("testSimpleEqualScores1", h, s);
throw e;
}
}
public void testSimpleEqualScores2() throws Exception {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
Arrays.asList(tq("dek", "albino"), tq("dek", "elephant")),
0.0f);
QueryUtils.check(random(), q, s);
ScoreDoc[] h = s.search(q, 1000).scoreDocs;
try {
assertEquals("3 docs should match " + q.toString(), 3, h.length);
float score = h[0].score;
for (int i = 1; i < h.length; i++) {
assertEquals("score #" + i + " is not the same", score, h[i].score,
SCORE_COMP_THRESH);
}
} catch (Error e) {
printHits("testSimpleEqualScores2", h, s);
throw e;
}
}
public void testSimpleEqualScores3() throws Exception {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
Arrays.asList(
tq("hed", "albino"),
tq("hed", "elephant"),
tq("dek", "albino"),
tq("dek", "elephant")),
0.0f);
QueryUtils.check(random(), q, s);
ScoreDoc[] h = s.search(q, 1000).scoreDocs;
try {
assertEquals("all docs should match " + q.toString(), 4, h.length);
float score = h[0].score;
for (int i = 1; i < h.length; i++) {
assertEquals("score #" + i + " is not the same", score, h[i].score,
SCORE_COMP_THRESH);
}
} catch (Error e) {
printHits("testSimpleEqualScores3", h, s);
throw e;
}
}
public void testSimpleTiebreaker() throws Exception {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
Arrays.asList(tq("dek", "albino"), tq("dek", "elephant")),
0.01f);
QueryUtils.check(random(), q, s);
ScoreDoc[] h = s.search(q, 1000).scoreDocs;
try {
assertEquals("3 docs should match " + q.toString(), 3, h.length);
assertEquals("wrong first", "d2", s.doc(h[0].doc).get("id"));
float score0 = h[0].score;
float score1 = h[1].score;
float score2 = h[2].score;
assertTrue("d2 does not have better score then others: " + score0
+ " >? " + score1, score0 > score1);
assertEquals("d4 and d1 don't have equal scores", score1, score2,
SCORE_COMP_THRESH);
} catch (Error e) {
printHits("testSimpleTiebreaker", h, s);
throw e;
}
}
public void testBooleanRequiredEqualScores() throws Exception {
BooleanQuery.Builder q = new BooleanQuery.Builder();
{
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "albino"), tq("dek", "albino")),
0.0f);
q.add(q1, BooleanClause.Occur.MUST);// true,false);
QueryUtils.check(random(), q1, s);
}
{
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "elephant"), tq("dek", "elephant")),
0.0f);
q.add(q2, BooleanClause.Occur.MUST);// true,false);
QueryUtils.check(random(), q2, s);
}
QueryUtils.check(random(), q.build(), s);
ScoreDoc[] h = s.search(q.build(), 1000).scoreDocs;
try {
assertEquals("3 docs should match " + q.toString(), 3, h.length);
float score = h[0].score;
for (int i = 1; i < h.length; i++) {
assertEquals("score #" + i + " is not the same", score, h[i].score,
SCORE_COMP_THRESH);
}
} catch (Error e) {
printHits("testBooleanRequiredEqualScores1", h, s);
throw e;
}
}
public void testBooleanOptionalNoTiebreaker() throws Exception {
BooleanQuery.Builder q = new BooleanQuery.Builder();
{
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "albino"), tq("dek", "albino")),
0.0f);
q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
}
{
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "elephant"), tq("dek", "elephant")),
0.0f);
q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
}
QueryUtils.check(random(), q.build(), s);
ScoreDoc[] h = s.search(q.build(), 1000).scoreDocs;
try {
assertEquals("4 docs should match " + q.toString(), 4, h.length);
float score = h[0].score;
for (int i = 1; i < h.length - 1; i++) { /* note: -1 */
assertEquals("score #" + i + " is not the same", score, h[i].score,
SCORE_COMP_THRESH);
}
assertEquals("wrong last", "d1", s.doc(h[h.length - 1].doc).get("id"));
float score1 = h[h.length - 1].score;
assertTrue("d1 does not have worse score then others: " + score + " >? "
+ score1, score > score1);
} catch (Error e) {
printHits("testBooleanOptionalNoTiebreaker", h, s);
throw e;
}
}
public void testBooleanOptionalWithTiebreaker() throws Exception {
BooleanQuery.Builder q = new BooleanQuery.Builder();
{
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "albino"), tq("dek", "albino")),
0.01f);
q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
}
{
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "elephant"), tq("dek", "elephant")),
0.01f);
q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
}
QueryUtils.check(random(), q.build(), s);
ScoreDoc[] h = s.search(q.build(), 1000).scoreDocs;
try {
assertEquals("4 docs should match " + q.toString(), 4, h.length);
float score0 = h[0].score;
float score1 = h[1].score;
float score2 = h[2].score;
float score3 = h[3].score;
String doc0 = s.doc(h[0].doc).get("id");
String doc1 = s.doc(h[1].doc).get("id");
String doc2 = s.doc(h[2].doc).get("id");
String doc3 = s.doc(h[3].doc).get("id");
assertTrue("doc0 should be d2 or d4: " + doc0, doc0.equals("d2")
|| doc0.equals("d4"));
assertTrue("doc1 should be d2 or d4: " + doc0, doc1.equals("d2")
|| doc1.equals("d4"));
assertEquals("score0 and score1 should match", score0, score1,
SCORE_COMP_THRESH);
assertEquals("wrong third", "d3", doc2);
assertTrue("d3 does not have worse score then d2 and d4: " + score1
+ " >? " + score2, score1 > score2);
assertEquals("wrong fourth", "d1", doc3);
assertTrue("d1 does not have worse score then d3: " + score2 + " >? "
+ score3, score2 > score3);
} catch (Error e) {
printHits("testBooleanOptionalWithTiebreaker", h, s);
throw e;
}
}
public void testBooleanOptionalWithTiebreakerAndBoost() throws Exception {
BooleanQuery.Builder q = new BooleanQuery.Builder();
{
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "albino", 1.5f), tq("dek", "albino")),
0.01f);
q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
}
{
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(
Arrays.asList(tq("hed", "elephant", 1.5f), tq("dek", "elephant")),
0.01f);
q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
}
QueryUtils.check(random(), q.build(), s);
ScoreDoc[] h = s.search(q.build(), 1000).scoreDocs;
try {
assertEquals("4 docs should match " + q.toString(), 4, h.length);
float score0 = h[0].score;
float score1 = h[1].score;
float score2 = h[2].score;
float score3 = h[3].score;
String doc0 = s.doc(h[0].doc).get("id");
String doc1 = s.doc(h[1].doc).get("id");
String doc2 = s.doc(h[2].doc).get("id");
String doc3 = s.doc(h[3].doc).get("id");
assertEquals("doc0 should be d4: ", "d4", doc0);
assertEquals("doc1 should be d3: ", "d3", doc1);
assertEquals("doc2 should be d2: ", "d2", doc2);
assertEquals("doc3 should be d1: ", "d1", doc3);
assertTrue("d4 does not have a better score then d3: " + score0 + " >? "
+ score1, score0 > score1);
assertTrue("d3 does not have a better score then d2: " + score1 + " >? "
+ score2, score1 > score2);
assertTrue("d3 does not have a better score then d1: " + score2 + " >? "
+ score3, score2 > score3);
} catch (Error e) {
printHits("testBooleanOptionalWithTiebreakerAndBoost", h, s);
throw e;
}
}
// LUCENE-4477 / LUCENE-4401:
public void testBooleanSpanQuery() throws Exception {
int hits = 0;
Directory directory = newDirectory();
Analyzer indexerAnalyzer = new MockAnalyzer(random());
IndexWriterConfig config = new IndexWriterConfig(indexerAnalyzer);
IndexWriter writer = new IndexWriter(directory, config);
String FIELD = "content";
Document d = new Document();
d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
writer.addDocument(d);
writer.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher searcher = newSearcher(indexReader);
DisjunctionMaxQuery query = new DisjunctionMaxQuery(
Arrays.asList(
new SpanTermQuery(new Term(FIELD, "clockwork")),
new SpanTermQuery(new Term(FIELD, "clckwork"))),
1.0f);
TopScoreDocCollector collector = TopScoreDocCollector.create(1000, Integer.MAX_VALUE);
searcher.search(query, collector);
hits = collector.topDocs().scoreDocs.length;
for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
System.out.println(scoreDoc.doc);
}
indexReader.close();
assertEquals(hits, 1);
directory.close();
}
public void testRewriteBoolean() throws Exception {
Query sub1 = tq("hed", "albino");
Query sub2 = tq("hed", "elephant");
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
Arrays.asList(
sub1, sub2
), 1.0f);
Query rewritten = s.rewrite(q);
assertTrue(rewritten instanceof BooleanQuery);
BooleanQuery bq = (BooleanQuery) rewritten;
assertEquals(bq.clauses().size(), 2);
assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
}
public void testRewriteEmpty() throws Exception {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(Collections.emptyList(), 0.0f);
Query rewritten = s.rewrite(q);
Query expected = new MatchNoDocsQuery();
assertEquals(expected, rewritten);
}
public void testRandomTopDocs() throws Exception {
doTestRandomTopDocs(2, 0.05f, 0.05f);
doTestRandomTopDocs(2, 1.0f, 0.05f);
doTestRandomTopDocs(3, 1.0f, 0.5f, 0.05f);
doTestRandomTopDocs(4, 1.0f, 0.5f, 0.05f, 0f);
doTestRandomTopDocs(4, 1.0f, 0.5f, 0.05f, 0f);
}
private void doTestRandomTopDocs(int numFields, double... freqs) throws IOException {
assert numFields == freqs.length;
Directory dir = newDirectory();
IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
IndexWriter w = new IndexWriter(dir, config);
int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100); // at night, make sure some terms have skip data
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
for (int j = 0; j < numFields; j++) {
StringBuilder builder = new StringBuilder();
int numAs = random().nextDouble() < freqs[j] ? 0 : 1 + random().nextInt(5);
for (int k = 0; k < numAs; k++) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append('a');
}
if (random().nextBoolean()) {
doc.add(new StringField("field", "c", Field.Store.NO));
}
int numOthers = random().nextBoolean() ? 0 : 1 + random().nextInt(5);
for (int k = 0; k < numOthers; k++) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(Integer.toString(random().nextInt()));
}
doc.add(new TextField(Integer.toString(j), new StringReader(builder.toString())));
}
w.addDocument(doc);
}
IndexReader reader = DirectoryReader.open(w);
w.close();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < 4; i++) {
List<Query> clauses = new ArrayList<>();
for (int j = 0; j < numFields; j++) {
if (i % 2 == 1) {
clauses.add(tq(Integer.toString(j), "a"));
} else {
float boost = random().nextBoolean() ? 0 : random().nextFloat();
if (boost > 0) {
clauses.add(tq(Integer.toString(j), "a", boost));
} else {
clauses.add(tq(Integer.toString(j), "a"));
}
}
}
float tieBreaker = random().nextFloat();
Query query = new DisjunctionMaxQuery(clauses, tieBreaker);
CheckHits.checkTopScores(random(), query, searcher);
query = new BooleanQuery.Builder()
.add(new DisjunctionMaxQuery(clauses, tieBreaker), BooleanClause.Occur.MUST)
.add(tq("field", "c"), BooleanClause.Occur.FILTER)
.build();
CheckHits.checkTopScores(random(), query, searcher);
}
reader.close();
dir.close();
}
/** macro */
protected Query tq(String f, String t) {
return new TermQuery(new Term(f, t));
}
/** macro */
protected Query tq(String f, String t, float b) {
Query q = tq(f, t);
return new BoostQuery(q, b);
}
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher)
throws Exception {
System.err.println("------- " + test + " -------");
DecimalFormat f = new DecimalFormat("0.000000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
for (int i = 0; i < h.length; i++) {
Document d = searcher.doc(h[i].doc);
float score = h[i].score;
System.err
.println("#" + i + ": " + f.format(score) + " - " + d.get("id"));
}
}
}