blob: f4a7c9912b86bc0d2f5f88685ef9e8cbf7160903 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.document;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CyclicBarrier;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene87.Lucene87Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
import static org.hamcrest.core.IsEqual.equalTo;
public class TestSuggestField extends LuceneTestCase {
public Directory dir;
@Before
public void before() throws Exception {
dir = newDirectory();
}
@After
public void after() throws Exception {
dir.close();
}
@Test
public void testEmptySuggestion() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new SuggestField("suggest_field", "", 3);
});
assertTrue(expected.getMessage().contains("value"));
}
@Test
public void testNegativeWeight() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new SuggestField("suggest_field", "sugg", -1);
});
assertTrue(expected.getMessage().contains("weight"));
}
@Test
public void testReservedChars() throws Exception {
CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
charsRefBuilder.append("sugg");
charsRefBuilder.setCharAt(2, (char) ConcatenateGraphFilter.SEP_LABEL);
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new SuggestField("name", charsRefBuilder.toString(), 1);
});
assertTrue(expected.getMessage().contains("[0x1f]"));
charsRefBuilder.setCharAt(2, (char) CompletionAnalyzer.HOLE_CHARACTER);
expected = expectThrows(IllegalArgumentException.class, () -> {
new SuggestField("name", charsRefBuilder.toString(), 1);
});
assertTrue(expected.getMessage().contains("[0x1e]"));
charsRefBuilder.setCharAt(2, (char) NRTSuggesterBuilder.END_BYTE);
expected = expectThrows(IllegalArgumentException.class, () -> {
new SuggestField("name", charsRefBuilder.toString(), 1);
});
assertTrue(expected.getMessage().contains("[0x0]"));
}
@Test
public void testEmpty() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertThat(lookupDocs.totalHits.value, equalTo(0L));
reader.close();
iw.close();
}
@Test
public void testTokenStream() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
SuggestField suggestField = new SuggestField("field", "input", 1);
BytesRef surfaceForm = new BytesRef("input");
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) {
output.writeVInt(surfaceForm.length);
output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
output.writeVInt(1 + 1);
output.writeByte(SuggestField.TYPE);
}
BytesRef payload = new BytesRef(byteArrayOutputStream.toByteArray());
TokenStream stream = new PayloadAttrToTypeAttrFilter(suggestField.tokenStream(analyzer, null));
assertTokenStreamContents(stream, new String[] {"input"}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null);
CompletionAnalyzer completionAnalyzer = new CompletionAnalyzer(analyzer);
stream = new PayloadAttrToTypeAttrFilter(suggestField.tokenStream(completionAnalyzer, null));
assertTokenStreamContents(stream, new String[] {"input"}, null, null, new String[]{payload.utf8ToString()}, new int[]{1}, null, null);
}
@Test @Slow
public void testDupSuggestFieldValues() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
final int num = Math.min(1000, atLeast(100));
int[] weights = new int[num];
for(int i = 0; i < num; i++) {
Document document = new Document();
weights[i] = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", "abc", weights[i]));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[num];
Arrays.sort(weights);
for (int i = 1; i <= num; i++) {
expectedEntries[i - 1] = new Entry("abc", weights[num - i]);
}
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false);
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
public void testDeduplication() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
final int num = TestUtil.nextInt(random(), 2, 20);
int[] weights = new int[num];
int bestABCWeight = Integer.MIN_VALUE;
int bestABDWeight = Integer.MIN_VALUE;
for(int i = 0; i < num; i++) {
Document document = new Document();
weights[i] = random().nextInt(Integer.MAX_VALUE);
String suggestValue;
boolean doABC;
if (i == 0) {
doABC = true;
} else if (i == 1) {
doABC = false;
} else {
doABC = random().nextBoolean();
}
if (doABC) {
suggestValue = "abc";
bestABCWeight = Math.max(bestABCWeight, weights[i]);
} else {
suggestValue = "abd";
bestABDWeight = Math.max(bestABDWeight, weights[i]);
}
document.add(new SuggestField("suggest_field", suggestValue, weights[i]));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[2];
if (bestABDWeight > bestABCWeight) {
expectedEntries[0] = new Entry("abd", bestABDWeight);
expectedEntries[1] = new Entry("abc", bestABCWeight);
} else {
expectedEntries[0] = new Entry("abc", bestABCWeight);
expectedEntries[1] = new Entry("abd", bestABDWeight);
}
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
suggestIndexSearcher.suggest(query, collector);
TopSuggestDocs lookupDocs = collector.get();
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
@Slow
public void testExtremeDeduplication() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
final int num = atLeast(500);
int bestWeight = Integer.MIN_VALUE;
for(int i = 0; i < num; i++) {
Document document = new Document();
int weight = TestUtil.nextInt(random(), 10, 100);
bestWeight = Math.max(weight, bestWeight);
document.add(new SuggestField("suggest_field", "abc", weight));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
}
Document document = new Document();
document.add(new SuggestField("suggest_field", "abd", 7));
iw.addDocument(document);
if (random().nextBoolean()) {
iw.forceMerge(1);
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[2];
expectedEntries[0] = new Entry("abc", bestWeight);
expectedEntries[1] = new Entry("abd", 7);
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
suggestIndexSearcher.suggest(query, collector);
TopSuggestDocs lookupDocs = collector.get();
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
private static String randomSimpleString(int numDigits, int maxLen) {
final int len = TestUtil.nextInt(random(), 1, maxLen);
final char[] chars = new char[len];
for(int j=0;j<len;j++) {
chars[j] = (char) ('a' + random().nextInt(numDigits));
}
return new String(chars);
}
public void testRandom() throws Exception {
int numDigits = TestUtil.nextInt(random(), 1, 6);
Set<String> keys = new HashSet<>();
int keyCount = TestUtil.nextInt(random(), 1, 20);
if (numDigits == 1) {
keyCount = Math.min(9, keyCount);
}
while (keys.size() < keyCount) {
keys.add(randomSimpleString(numDigits, 10));
}
List<String> keysList = new ArrayList<>(keys);
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
// we rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int docCount = TestUtil.nextInt(random(), 1, 200);
Entry[] docs = new Entry[docCount];
for(int i=0;i<docCount;i++) {
int weight = random().nextInt(40);
String key = keysList.get(random().nextInt(keyCount));
//System.out.println("KEY: " + key);
docs[i] = new Entry(key, null, weight, i);
Document doc = new Document();
doc.add(new SuggestField("suggest_field", key, weight));
iw.addDocument(doc);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
int iters = atLeast(200);
for(int iter=0;iter<iters;iter++) {
String prefix = randomSimpleString(numDigits, 2);
if (VERBOSE) {
System.out.println("\nTEST: prefix=" + prefix);
}
// slow but hopefully correct suggester:
List<Entry> expected = new ArrayList<>();
for(Entry doc : docs) {
if (doc.output.startsWith(prefix)) {
expected.add(doc);
}
}
Collections.sort(expected,
(a, b) -> {
// sort by higher score:
int cmp = Float.compare(b.value, a.value);
if (cmp == 0) {
// tie break by completion key
cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.output, b.output);
if (cmp == 0) {
// prefer smaller doc id, in case of a tie
cmp = Integer.compare(a.id, b.id);
}
}
return cmp;
});
boolean dedup = random().nextBoolean();
if (dedup) {
List<Entry> deduped = new ArrayList<>();
Set<String> seen = new HashSet<>();
for(Entry entry : expected) {
if (seen.contains(entry.output) == false) {
seen.add(entry.output);
deduped.add(entry);
}
}
expected = deduped;
}
// TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
//int topN = TestUtil.nextInt(random(), 1, docCount+10);
int topN = docCount;
if (VERBOSE) {
if (dedup) {
System.out.println(" expected (dedup'd) topN=" + topN + ":");
} else {
System.out.println(" expected topN=" + topN + ":");
}
for(int i=0;i<expected.size();i++) {
if (i >= topN) {
System.out.println(" leftover: " + i + ": " + expected.get(i));
} else {
System.out.println(" " + i + ": " + expected.get(i));
}
}
}
expected = expected.subList(0, Math.min(topN, expected.size()));
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
searcher.suggest(query, collector);
TopSuggestDocs actual = collector.get();
if (VERBOSE) {
System.out.println(" actual:");
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
for(int i=0;i<suggestScoreDocs.length;i++) {
System.out.println(" " + i + ": " + suggestScoreDocs[i]);
}
}
assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
}
reader.close();
iw.close();
}
@Test
public void testNRTDeletedDocFiltering() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// using IndexWriter instead of RandomIndexWriter
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
int numLive = 0;
List<Entry> expectedEntries = new ArrayList<>();
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, num - i));
if (i % 2 == 0) {
document.add(newStringField("str_field", "delete", Field.Store.YES));
} else {
numLive++;
expectedEntries.add(new Entry("abc_" + i, num - i));
document.add(newStringField("str_field", "no_delete", Field.Store.YES));
}
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(new Term("str_field", "delete"));
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
reader.close();
iw.close();
}
@Test
public void testSuggestOnAllFilteredDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
BitsProducer filter = new BitsProducer() {
@Override
public Bits getBits(LeafReaderContext context) throws IOException {
return new Bits.MatchNoBits(context.reader().maxDoc());
}
};
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
// no random access required;
// calling suggest with filter that does not match any documents should early terminate
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits.value, equalTo(0L));
reader.close();
iw.close();
}
@Test
public void testSuggestOnAllDeletedDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// using IndexWriter instead of RandomIndexWriter
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(newStringField("delete", "delete", Field.Store.NO));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(new Term("delete", "delete"));
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits.value, equalTo(0L));
reader.close();
iw.close();
}
@Test
public void testSuggestOnMostlyDeletedDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// using IndexWriter instead of RandomIndexWriter
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 1; i <= num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(new StoredField("weight_fld", i));
document.add(new IntPoint("weight_fld", i));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(IntPoint.newRangeQuery("weight_fld", 2, Integer.MAX_VALUE));
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_1", 1));
reader.close();
iw.close();
}
@Test
public void testMultipleSuggestFieldsPerDoc() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "sug_field_1", "sug_field_2"));
Document document = new Document();
document.add(new SuggestField("sug_field_1", "apple", 4));
document.add(new SuggestField("sug_field_2", "april", 3));
iw.addDocument(document);
document = new Document();
document.add(new SuggestField("sug_field_1", "aples", 3));
document.add(new SuggestField("sug_field_2", "apartment", 2));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap"));
TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3));
query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap"));
TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2));
// check that the doc ids are consistent
for (int i = 0; i < suggestDocs1.scoreDocs.length; i++) {
ScoreDoc suggestScoreDoc = suggestDocs1.scoreDocs[i];
assertThat(suggestScoreDoc.doc, equalTo(suggestDocs2.scoreDocs[i].doc));
}
reader.close();
iw.close();
}
@Test
public void testEarlyTermination() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
// have segments of 4 documents
// with descending suggestion weights
// suggest should early terminate for
// segments with docs having lower suggestion weights
for (int i = num; i > 0; i--) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
iw.addDocument(document);
if (i % 4 == 0) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + num, num));
reader.close();
iw.close();
}
@Test
public void testMultipleSegments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
List<Entry> entries = new ArrayList<>();
// ensure at least some segments have no suggest field
for (int i = num; i > 0; i--) {
Document document = new Document();
if (random().nextInt(4) == 1) {
document.add(new SuggestField("suggest_field", "abc_" + i, i));
entries.add(new Entry("abc_" + i, i));
}
document.add(new StoredField("weight_fld", i));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
reader.close();
iw.close();
}
@Test
public void testReturnedDocID() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, num));
document.add(new StoredField("int_field", i));
iw.addDocument(document);
if (random().nextBoolean()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertEquals(num, suggest.totalHits.value);
for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
String key = suggestScoreDoc.key.toString();
assertTrue(key.startsWith("abc_"));
String substring = key.substring(4);
int fieldValue = Integer.parseInt(substring);
Document doc = reader.document(suggestScoreDoc.doc);
assertEquals(doc.getField("int_field").numericValue().intValue(), fieldValue);
}
reader.close();
iw.close();
}
@Test
public void testScoring() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(50));
String[] prefixes = {"abc", "bac", "cab"};
Map<String, Integer> mappings = new HashMap<>();
for (int i = 0; i < num; i++) {
Document document = new Document();
String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i);
int weight = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight);
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
for (String prefix : prefixes) {
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits.value > 0);
float topScore = -1;
for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
if (topScore != -1) {
assertTrue(topScore >= scoreDoc.score);
}
topScore = scoreDoc.score;
assertThat((float) mappings.get(scoreDoc.key.toString()), equalTo(scoreDoc.score));
assertNotNull(mappings.remove(scoreDoc.key.toString()));
}
}
assertThat(mappings.size(), equalTo(0));
reader.close();
iw.close();
}
@Test
public void testRealisticKeys() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
LineFileDocs lineFileDocs = new LineFileDocs(random());
int num = Math.min(1000, atLeast(50));
Map<String, Integer> mappings = new HashMap<>();
for (int i = 0; i < num; i++) {
Document document = lineFileDocs.nextDoc();
String title = document.getField("title").stringValue();
int maxLen = Math.min(title.length(), 500);
String prefix = title.substring(0, maxLen);
int weight = random().nextInt(Integer.MAX_VALUE);
Integer prevWeight = mappings.get(prefix);
if (prevWeight == null || prevWeight < weight) {
mappings.put(prefix, weight);
}
Document doc = new Document();
doc.add(new SuggestField("suggest_field", prefix, weight));
iw.addDocument(doc);
if (rarely()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
for (Map.Entry<String, Integer> entry : mappings.entrySet()) {
String title = entry.getKey();
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title));
TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size(), false);
assertTrue(suggest.totalHits.value > 0);
boolean matched = false;
for (ScoreDoc scoreDoc : suggest.scoreDocs) {
matched = Float.compare(scoreDoc.score, (float) entry.getValue()) == 0;
if (matched) {
break;
}
}
assertTrue("at least one of the entries should have the score", matched);
}
lineFileDocs.close();
reader.close();
iw.close();
}
@Test
public void testThreads() throws Exception {
final Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field_1", "suggest_field_2", "suggest_field_3"));
int num = Math.min(1000, atLeast(100));
final String prefix1 = "abc1_";
final String prefix2 = "abc2_";
final String prefix3 = "abc3_";
final Entry[] entries1 = new Entry[num];
final Entry[] entries2 = new Entry[num];
final Entry[] entries3 = new Entry[num];
for (int i = 0; i < num; i++) {
int weight = num - (i + 1);
entries1[i] = new Entry(prefix1 + weight, weight);
entries2[i] = new Entry(prefix2 + weight, weight);
entries3[i] = new Entry(prefix3 + weight, weight);
}
for (int i = 0; i < num; i++) {
Document doc = new Document();
doc.add(new SuggestField("suggest_field_1", prefix1 + i, i));
doc.add(new SuggestField("suggest_field_2", prefix2 + i, i));
doc.add(new SuggestField("suggest_field_3", prefix3 + i, i));
iw.addDocument(doc);
if (rarely()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread threads[] = new Thread[numThreads];
final CyclicBarrier startingGun = new CyclicBarrier(numThreads+1);
final CopyOnWriteArrayList<Throwable> errors = new CopyOnWriteArrayList<>();
final SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2));
suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries2);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3));
suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries3);
} catch (Throwable e) {
errors.add(e);
}
}
};
threads[i].start();
}
startingGun.await();
for (Thread t : threads) {
t.join();
}
assertTrue(errors.toString(), errors.isEmpty());
reader.close();
iw.close();
}
static class Entry {
final String output;
final float value;
final String context;
final int id;
Entry(String output, float value) {
this(output, null, value);
}
Entry(String output, String context, float value) {
this(output, context, value, -1);
}
Entry(String output, String context, float value, int id) {
this.output = output;
this.value = value;
this.context = context;
this.id = id;
}
@Override
public String toString() {
return "key=" + output + " score=" + value + " context=" + context + " id=" + id;
}
}
static void assertSuggestions(TopDocs actual, Entry... expected) {
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
for (int i = 0; i < Math.min(expected.length, suggestScoreDocs.length); i++) {
SuggestScoreDoc lookupDoc = suggestScoreDocs[i];
String msg = "Hit " + i + ": expected: " + toString(expected[i]) + " but actual: " + toString(lookupDoc);
assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output));
assertThat(msg, lookupDoc.score, equalTo(expected[i].value));
assertThat(msg, lookupDoc.context, equalTo(expected[i].context));
}
assertThat(suggestScoreDocs.length, equalTo(expected.length));
}
private static String toString(Entry expected) {
return "key:"+ expected.output+" score:"+expected.value+" context:"+expected.context;
}
private static String toString(SuggestScoreDoc actual) {
return "key:"+ actual.key.toString()+" score:"+actual.score+" context:"+actual.context;
}
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, String... suggestFields) {
return iwcWithSuggestField(analyzer, asSet(suggestFields));
}
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
Codec filterCodec = new Lucene87Codec() {
CompletionPostingsFormat.FSTLoadMode fstLoadMode =
RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (suggestFields.contains(field)) {
return postingsFormat;
}
return super.getPostingsFormatForField(field);
}
};
iwc.setCodec(filterCodec);
return iwc;
}
public final static class PayloadAttrToTypeAttrFilter extends TokenFilter {
private PayloadAttribute payload = addAttribute(PayloadAttribute.class);
private TypeAttribute type = addAttribute(TypeAttribute.class);
protected PayloadAttrToTypeAttrFilter(TokenStream input) {
super(input);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
// we move them over so we can assert them more easily in the tests
type.setType(payload.getPayload().utf8ToString());
return true;
}
return false;
}
}
}