blob: 4339f991ffacaa5d1a6042b1c57bdf6616a7958b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.document;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.search.suggest.document.TestSuggestField.Entry;
import static org.apache.lucene.search.suggest.document.TestSuggestField.assertSuggestions;
import static org.apache.lucene.search.suggest.document.TestSuggestField.iwcWithSuggestField;
import static org.hamcrest.core.IsEqual.equalTo;
public class TestPrefixCompletionQuery extends LuceneTestCase {
private static class NumericRangeBitsProducer extends BitsProducer {
private final String field;
private final long min, max;
public NumericRangeBitsProducer(String field, long min, long max) {
this.field = field;
this.min = min;
this.max = max;
}
@Override
public String toString() {
return field + "[" + min + ".." + max + "]";
}
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
NumericRangeBitsProducer that = (NumericRangeBitsProducer) obj;
return field.equals(that.field)
&& min == that.min
&& max == that.max;
}
@Override
public int hashCode() {
return Objects.hash(getClass(), field, min, max);
}
@Override
public Bits getBits(final LeafReaderContext context) throws IOException {
final int maxDoc = context.reader().maxDoc();
FixedBitSet bits = new FixedBitSet(maxDoc);
final SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
int docID;
while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
final int count = values.docValueCount();
for (int i = 0; i < count; ++i) {
final long v = values.nextValue();
if (v >= min && v <= max) {
bits.set(docID);
break;
}
}
}
return bits;
}
}
public Directory dir;
@Before
public void before() throws Exception {
dir = newDirectory();
}
@After
public void after() throws Exception {
dir.close();
}
public void testSimple() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc", 3));
document.add(new SuggestField("suggest_field", "abd", 4));
document.add(new SuggestField("suggest_field", "The Foo Fighters", 2));
iw.addDocument(document);
document = new Document();
document.add(new SuggestField("suggest_field", "abcdd", 5));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3));
reader.close();
iw.close();
}
@Test
public void testEmptyPrefixQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new SuggestField("suggest_field", "suggestion1", 1));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", ""));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertEquals(0, suggest.scoreDocs.length);
reader.close();
iw.close();
}
public void testMostlyFilteredOutDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(new NumericDocValuesField("filter_int_fld", i));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
int topScore = num/2;
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 0, topScore);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if at most half of the top scoring documents have been filtered out
// the search should be admissible for a single segment
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits.value >= 1);
assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));
filter = new NumericRangeBitsProducer("filter_int_fld", 0, 0);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if more than half of the top scoring documents have been filtered out
// search is not admissible, so # of suggestions requested is num instead of 1
suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, new Entry("abc_0", 0));
filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if only lower scoring documents are filtered out
// search is admissible
suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1));
reader.close();
iw.close();
}
public void testDocFiltering() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new NumericDocValuesField("filter_int_fld", 9));
document.add(new SuggestField("suggest_field", "apples", 3));
iw.addDocument(document);
document = new Document();
document.add(new NumericDocValuesField("filter_int_fld", 10));
document.add(new SuggestField("suggest_field", "applle", 4));
iw.addDocument(document);
document = new Document();
document.add(new NumericDocValuesField("filter_int_fld", 4));
document.add(new SuggestField("suggest_field", "apple", 5));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
// suggest without filter
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3));
// suggest with filter
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter);
suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3));
reader.close();
iw.close();
}
public void testAnalyzerDefaults() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
final String field = getTestName();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(completionAnalyzer, field));
Document document = new Document();
document.add(new SuggestField(field, "foobar", 7));
document.add(new SuggestField(field, "foo bar", 8));
document.add(new SuggestField(field, "the fo", 9));
document.add(new SuggestField(field, "the foo bar", 10));
document.add(new SuggestField(field, "foo the bar", 11)); // middle stopword
document.add(new SuggestField(field, "baz the", 12)); // trailing stopword
iw.addDocument(document);
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 9, false); //matches all with "fo*"
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("foo bar", 8), new Entry("foobar", 7));
// with leading stopword
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "the fo")); // becomes "_ fo*"
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9));
// with middle stopword
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "foo the bar")); // becomes "foo _ bar*"
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("foo the bar", 11));
// no space
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "foob"));
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("foobar", 7));
// surrounding stopwords
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "the baz the")); // becomes "_ baz _"
suggest = indexSearcher.suggest(query, 4, false);
assertSuggestions(suggest);
reader.close();
iw.close();
}
public void testAnalyzerWithoutSeparator() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
//note: when we don't preserve separators, the choice of preservePosInc is irrelevant
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, false, random().nextBoolean());
final String field = getTestName();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(completionAnalyzer, field));
Document document = new Document();
document.add(new SuggestField(field, "foobar", 7));
document.add(new SuggestField(field, "foo bar", 8));
document.add(new SuggestField(field, "the fo", 9));
document.add(new SuggestField(field, "the foo bar", 10));
document.add(new SuggestField(field, "foo the bar", 11)); // middle stopword
document.add(new SuggestField(field, "baz the", 12)); // trailing stopword
iw.addDocument(document);
// note we use the completionAnalyzer with the queries (instead of input analyzer) because of non-default settings
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 9, false); //matches all with fo
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
// with leading stopword
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "the fo")); // becomes "fo*"
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
// with middle stopword
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "foo the bar")); // becomes "foobar*"
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
// no space
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "foob"));
suggest = indexSearcher.suggest(query, 9, false); // no separators, thus match several
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
// surrounding stopwords
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "the baz the")); // becomes "baz*"
suggest = indexSearcher.suggest(query, 4, false);// stopwords in query get removed so we match
assertSuggestions(suggest, new Entry("baz the", 12));
reader.close();
iw.close();
}
public void testAnalyzerNoPreservePosInc() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer, true, false);
final String field = getTestName();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(completionAnalyzer, field));
Document document = new Document();
document.add(new SuggestField(field, "foobar", 7));
document.add(new SuggestField(field, "foo bar", 8));
document.add(new SuggestField(field, "the fo", 9));
document.add(new SuggestField(field, "the foo bar", 10));
document.add(new SuggestField(field, "foo the bar", 11)); // middle stopword
document.add(new SuggestField(field, "baz the", 12)); // trailing stopword
iw.addDocument(document);
// note we use the completionAnalyzer with the queries (instead of input analyzer) because of non-default settings
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 9, false); //matches all with fo
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
// with leading stopword
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "the fo")); // becomes "fo*"
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
// with middle stopword
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "foo the bar")); // becomes "foo bar*"
suggest = indexSearcher.suggest(query, 9, false);
assertSuggestions(suggest, new Entry("foo the bar", 11), new Entry("the foo bar", 10), new Entry("foo bar", 8)); // no foobar
// no space
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "foob"));
suggest = indexSearcher.suggest(query, 4, false); // separators, thus only match "foobar"
assertSuggestions(suggest, new Entry("foobar", 7));
// surrounding stopwords
query = new PrefixCompletionQuery(completionAnalyzer, new Term(field, "the baz the")); // becomes "baz*"
suggest = indexSearcher.suggest(query, 4, false);// stopwords in query get removed so we match
assertSuggestions(suggest, new Entry("baz the", 12));
reader.close();
iw.close();
}
public void testGhostField() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field", "suggest_field2", "suggest_field3"));
Document document = new Document();
document.add(new StringField("id", "0", Field.Store.NO));
document.add(new SuggestField("suggest_field", "apples", 3));
iw.addDocument(document);
// need another document so whole segment isn't deleted
iw.addDocument(new Document());
iw.commit();
document = new Document();
document.add(new StringField("id", "1", Field.Store.NO));
document.add(new SuggestField("suggest_field2", "apples", 3));
iw.addDocument(document);
iw.commit();
iw.deleteDocuments(new Term("id", "0"));
// first force merge is OK
iw.forceMerge(1);
// second force merge causes MultiFields to include "suggest_field" in its iteration, yet a null Terms is returned (no documents have
// this field anymore)
iw.addDocument(new Document());
iw.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
assertEquals(0, indexSearcher.suggest(query, 3, false).totalHits.value);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
assertSuggestions(indexSearcher.suggest(query, 3, false), new Entry("apples", 3));
reader.close();
iw.close();
}
public void testEmptyPrefixContextQuery() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
Document document = new Document();
document.add(new ContextSuggestField("suggest_field", "suggestion", 1, "type"));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "")));
query.addContext("type", 1);
// Ensure that context queries optimize an empty prefix to a fully empty automaton.
CompletionWeight weight = (CompletionWeight) query.createWeight(
suggestIndexSearcher, ScoreMode.COMPLETE, 1.0F);
assertEquals(0, weight.getAutomaton().getNumStates());
// Check that there are no suggestions.
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertEquals(0, suggest.scoreDocs.length);
reader.close();
iw.close();
}
}