blob: ff2bf4dcbe6e6162ca7753e08d0fc8b0e409ba8c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.ImpactsSource;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestSynonymQuery extends LuceneTestCase {
public void testEquals() {
QueryUtils.checkEqual(new SynonymQuery.Builder("foo").build(), new SynonymQuery.Builder("foo").build());
QueryUtils.checkEqual(new SynonymQuery.Builder("foo").addTerm(new Term("foo", "bar")).build(),
new SynonymQuery.Builder("foo").addTerm(new Term("foo", "bar")).build());
QueryUtils.checkEqual(new SynonymQuery.Builder("a").addTerm(new Term("a", "a")).addTerm(new Term("a", "b")).build(),
new SynonymQuery.Builder("a").addTerm(new Term("a", "b")).addTerm(new Term("a", "a")).build());
QueryUtils.checkEqual(
new SynonymQuery.Builder("field")
.addTerm(new Term("field", "b"), 0.4f)
.addTerm(new Term("field", "c"), 0.2f)
.addTerm(new Term("field", "d")).build(),
new SynonymQuery.Builder("field")
.addTerm(new Term("field", "b"), 0.4f)
.addTerm(new Term("field", "c"), 0.2f)
.addTerm(new Term("field", "d")).build());
}
public void testBogusParams() {
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a")).addTerm(new Term("field2", "b"));
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), 1.3f);
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), Float.NaN);
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), Float.POSITIVE_INFINITY);
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), Float.NEGATIVE_INFINITY);
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), -0.3f);
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), 0f);
});
expectThrows(IllegalArgumentException.class, () -> {
new SynonymQuery.Builder("field1").addTerm(new Term("field1", "a"), -0f);
});
}
public void testToString() {
assertEquals("Synonym()", new SynonymQuery.Builder("foo").build().toString());
Term t1 = new Term("foo", "bar");
assertEquals("Synonym(foo:bar)", new SynonymQuery.Builder("foo")
.addTerm(t1).build().toString());
Term t2 = new Term("foo", "baz");
assertEquals("Synonym(foo:bar foo:baz)", new SynonymQuery.Builder("foo")
.addTerm(t1).addTerm(t2).build().toString());
}
public void testScores() throws IOException {
doTestScores(2);
doTestScores(Integer.MAX_VALUE);
}
private void doTestScores(int totalHitsThreshold) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StringField("f", "a", Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(new StringField("f", "b", Store.NO));
for (int i = 0; i < 10; ++i) {
w.addDocument(doc);
}
float boost = random().nextBoolean() ? random().nextFloat() : 1f;
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
SynonymQuery query = new SynonymQuery.Builder("f")
.addTerm(new Term("f", "a"), boost == 0 ? 1f : boost)
.addTerm(new Term("f", "b"), boost == 0 ? 1f : boost)
.build();
TopScoreDocCollector collector = TopScoreDocCollector.create(Math.min(reader.numDocs(), totalHitsThreshold), null, totalHitsThreshold);
searcher.search(query, collector);
TopDocs topDocs = collector.topDocs();
if (topDocs.totalHits.value < totalHitsThreshold) {
assertEquals(new TotalHits(11, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);
} else {
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
}
// All docs must have the same score
for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
}
reader.close();
w.close();
dir.close();
}
public void testBoosts() throws IOException {
doTestBoosts(2);
doTestBoosts(Integer.MAX_VALUE);
}
public void doTestBoosts(int totalHitsThreshold) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
doc.add(new Field("f", "c", ft));
w.addDocument(doc);
for (int i = 0; i < 10; ++i) {
doc.clear();
doc.add(new Field("f", "a a a a", ft));
w.addDocument(doc);
if (i % 2 == 0) {
doc.clear();
doc.add(new Field("f", "b b", ft));
w.addDocument(doc);
} else {
doc.clear();
doc.add(new Field("f", "a a b", ft));
w.addDocument(doc);
}
}
doc.clear();
doc.add(new Field("f", "c", ft));
w.addDocument(doc);
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
SynonymQuery query = new SynonymQuery.Builder("f")
.addTerm(new Term("f", "a"), 0.25f)
.addTerm(new Term("f", "b"), 0.5f)
.addTerm(new Term("f", "c"))
.build();
TopScoreDocCollector collector = TopScoreDocCollector.create(Math.min(reader.numDocs(), totalHitsThreshold), null, totalHitsThreshold);
searcher.search(query, collector);
TopDocs topDocs = collector.topDocs();
if (topDocs.totalHits.value < totalHitsThreshold) {
assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation);
assertEquals(22, topDocs.totalHits.value);
} else {
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
}
// All docs must have the same score
for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
}
reader.close();
w.close();
dir.close();
}
public void testMergeImpacts() throws IOException {
DummyImpactsEnum impacts1 = new DummyImpactsEnum();
impacts1.reset(42,
new Impact[][] {
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
new Impact[] { new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
},
new int[] {
110,
945
});
DummyImpactsEnum impacts2 = new DummyImpactsEnum();
impacts2.reset(45,
new Impact[][] {
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
},
new int[] {
90,
1000
});
ImpactsSource mergedImpacts = SynonymQuery.mergeImpacts(new ImpactsEnum[] { impacts1, impacts2 }, new float[] { 1f, 1f });
assertEquals(
new Impact[][] {
new Impact[] { new Impact(5, 10), new Impact(7, 12), new Impact(14, 13) },
new Impact[] { new Impact(Integer.MAX_VALUE, 1) }
},
new int[] {
90,
1000
},
mergedImpacts.getImpacts());
ImpactsSource mergedBoostedImpacts = SynonymQuery.mergeImpacts(new ImpactsEnum[] { impacts1, impacts2 }, new float[] { 0.3f, 0.9f });
assertEquals(
new Impact[][] {
new Impact[] { new Impact(3, 10), new Impact(4, 12), new Impact(9, 13) },
new Impact[] { new Impact(Integer.MAX_VALUE, 1) }
},
new int[] {
90,
1000
},
mergedBoostedImpacts.getImpacts());
// docID is > the first doIdUpTo of impacts1
impacts2.reset(112,
new Impact[][] {
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
},
new int[] {
150,
1000
});
assertEquals(
new Impact[][] {
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) }, // same as impacts1
new Impact[] { new Impact(3, 9), new Impact(10, 11), new Impact(15, 13), new Impact(19, 14) }
},
new int[] {
110,
945
},
mergedImpacts.getImpacts());
assertEquals(
new Impact[][] {
new Impact[] { new Impact(1, 10), new Impact(2, 12), new Impact(3, 13) }, // same as impacts1*boost
new Impact[] { new Impact(3, 9), new Impact(7, 11), new Impact(10, 13), new Impact(11, 14) }
},
new int[] {
110,
945
},
mergedBoostedImpacts.getImpacts());
}
private static void assertEquals(Impact[][] impacts, int[] docIdUpTo, Impacts actual) {
assertEquals(impacts.length, actual.numLevels());
for (int i = 0; i < impacts.length; ++i) {
assertEquals(docIdUpTo[i], actual.getDocIdUpTo(i));
assertEquals(Arrays.asList(impacts[i]), actual.getImpacts(i));
}
}
private static class DummyImpactsEnum extends ImpactsEnum {
private int docID;
private Impact[][] impacts;
private int[] docIdUpTo;
void reset(int docID, Impact[][] impacts, int[] docIdUpTo) {
this.docID = docID;
this.impacts = impacts;
this.docIdUpTo = docIdUpTo;
}
@Override
public void advanceShallow(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Impacts getImpacts() throws IOException {
return new Impacts() {
@Override
public int numLevels() {
return impacts.length;
}
@Override
public int getDocIdUpTo(int level) {
return docIdUpTo[level];
}
@Override
public List<Impact> getImpacts(int level) {
return Arrays.asList(impacts[level]);
}
};
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int nextPosition() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int startOffset() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int endOffset() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef getPayload() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
throw new UnsupportedOperationException();
}
}
public void testRandomTopDocs() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
int numDocs = TEST_NIGHTLY ? atLeast(128 * 8 * 8 * 3) : atLeast(100); // at night, make sure some terms have skip data
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int numValues = random().nextInt(1 << random().nextInt(5));
int start = random().nextInt(10);
for (int j = 0; j < numValues; ++j) {
int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
for (int k = 0; k < freq; ++k) {
doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
}
}
w.addDocument(doc);
}
IndexReader reader = DirectoryReader.open(w);
w.close();
IndexSearcher searcher = newSearcher(reader);
for (int term1 = 0; term1 < 15; ++term1) {
int term2;
do {
term2 = random().nextInt(15);
} while (term1 == term2);
float boost1 = random().nextBoolean() ? Math.max(random().nextFloat(), Float.MIN_NORMAL) : 1f;
float boost2 = random().nextBoolean() ? Math.max(random().nextFloat(), Float.MIN_NORMAL) : 1f;
Query query = new SynonymQuery.Builder("foo")
.addTerm(new Term("foo", Integer.toString(term1)), boost1)
.addTerm(new Term("foo", Integer.toString(term2)), boost2)
.build();
TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, Integer.MAX_VALUE); // COMPLETE
TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, 1); // TOP_SCORES
searcher.search(query, collector1);
searcher.search(query, collector2);
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
int filterTerm = random().nextInt(15);
Query filteredQuery = new BooleanQuery.Builder()
.add(query, Occur.MUST)
.add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
.build();
collector1 = TopScoreDocCollector.create(10, null, Integer.MAX_VALUE); // COMPLETE
collector2 = TopScoreDocCollector.create(10, null, 1); // TOP_SCORES
searcher.search(filteredQuery, collector1);
searcher.search(filteredQuery, collector2);
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
}
reader.close();
dir.close();
}
}