blob: bf1085d3bf827b78b8843a404a7fec8ac01d3841 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.analyzing;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
public void testBasic() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals("foobaz", results.get(0).payload.utf8ToString());
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
assertEquals(8, results.get(1).value);
assertEquals(new BytesRef("foobar"), results.get(1).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("pen", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("p", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("money penny", random()), 10, false, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>penny</b> saved is a <b>penny</b> earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("penny ea", random()), 10, false, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>penny</b> saved is a <b>penny</b> <b>ea</b>rned", results.get(0).highlightKey);
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your <b>ea</b>r", results.get(1).highlightKey);
results = suggester.lookup(TestUtil.stringToCharSequence("money penny", random()), 10, false, false);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertNull(results.get(0).highlightKey);
testConstructorDefaults(suggester, keys, a, true, true);
testConstructorDefaults(suggester, keys, a, true, false);
testConstructorDefaults(suggester, keys, a, false, false);
testConstructorDefaults(suggester, keys, a, false, true);
suggester.close();
a.close();
}
private void testConstructorDefaults(AnalyzingInfixSuggester suggester, Input[] keys, Analyzer a,
boolean allTermsRequired, boolean highlight) throws IOException {
AnalyzingInfixSuggester suggester2 = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false, allTermsRequired, highlight);
suggester2.build(new InputArrayIterator(keys));
CharSequence key = TestUtil.stringToCharSequence("penny ea", random());
List<LookupResult> results1 = suggester.lookup(key, 10, allTermsRequired, highlight);
List<LookupResult> results2 = suggester2.lookup(key, false, 10);
assertEquals(results1.size(), results2.size());
assertEquals(results1.get(0).key, results2.get(0).key);
assertEquals(results1.get(0).highlightKey, results2.get(0).highlightKey);
suggester2.close();
}
public void testAfterLoad() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Path tempDir = createTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
assertEquals(2, suggester.getCount());
suggester.close();
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals(2, suggester.getCount());
suggester.close();
a.close();
}
/** Used to return highlighted result; see {@link
* LookupResult#highlightKey} */
private static final class LookupHighlightFragment {
/** Portion of text for this fragment. */
public final String text;
/** True if this text matched a part of the user's
* query. */
public final boolean isHit;
/** Sole constructor. */
public LookupHighlightFragment(String text, boolean isHit) {
this.text = text;
this.isHit = isHit;
}
@Override
public String toString() {
return "LookupHighlightFragment(text=" + text + " isHit=" + isHit + ")";
}
}
@SuppressWarnings("unchecked")
public void testHighlightAsObject() throws Exception {
Input keys[] = new Input[] {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false) {
@Override
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
ts.reset();
List<LookupHighlightFragment> fragments = new ArrayList<>();
int upto = 0;
while (ts.incrementToken()) {
String token = termAtt.toString();
int startOffset = offsetAtt.startOffset();
int endOffset = offsetAtt.endOffset();
if (upto < startOffset) {
fragments.add(new LookupHighlightFragment(text.substring(upto, startOffset), false));
upto = startOffset;
} else if (upto > startOffset) {
continue;
}
if (matchedTokens.contains(token)) {
// Token matches.
fragments.add(new LookupHighlightFragment(text.substring(startOffset, endOffset), true));
upto = endOffset;
} else if (prefixToken != null && token.startsWith(prefixToken)) {
fragments.add(new LookupHighlightFragment(text.substring(startOffset, startOffset+prefixToken.length()), true));
if (prefixToken.length() < token.length()) {
fragments.add(new LookupHighlightFragment(text.substring(startOffset+prefixToken.length(), startOffset+token.length()), false));
}
upto = endOffset;
}
}
ts.end();
int endOffset = offsetAtt.endOffset();
if (upto < endOffset) {
fragments.add(new LookupHighlightFragment(text.substring(upto), false));
}
return fragments;
}
}
};
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", toString((List<LookupHighlightFragment>) results.get(0).highlightKey));
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
suggester.close();
a.close();
}
public String toString(List<LookupHighlightFragment> fragments) {
StringBuilder sb = new StringBuilder();
for(LookupHighlightFragment fragment : fragments) {
if (fragment.isHit) {
sb.append("<b>");
}
sb.append(fragment.text);
if (fragment.isHit) {
sb.append("</b>");
}
}
return sb.toString();
}
public void testRandomMinPrefixLength() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Path tempDir = createTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
int minPrefixLength = random().nextInt(10);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, minPrefixLength, false);
suggester.build(new InputArrayIterator(keys));
for(int i=0;i<2;i++) {
for(int j=0;j<2;j++) {
boolean doHighlight = j == 0;
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, doHighlight);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
if (doHighlight) {
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
}
assertEquals(10, results.get(0).value);
assertEquals("lend me your ear", results.get(1).key);
if (doHighlight) {
assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
}
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals(8, results.get(1).value);
assertEquals(new BytesRef("foobar"), results.get(1).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("ear ", random()), 10, true, doHighlight);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
if (doHighlight) {
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
}
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("pen", random()), 10, true, doHighlight);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
if (doHighlight) {
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).highlightKey);
}
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("p", random()), 10, true, doHighlight);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
if (doHighlight) {
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).highlightKey);
}
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
}
// Make sure things still work after close and reopen:
suggester.close();
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, minPrefixLength, false);
}
suggester.close();
a.close();
}
public void testHighlight() throws Exception {
Input keys[] = new Input[] {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
suggester.close();
a.close();
}
public void testHighlightCaseChange() throws Exception {
Input keys[] = new Input[] {
new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a Penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
suggester.close();
// Try again, but overriding addPrefixMatch to highlight
// the entire hit:
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false) {
@Override
protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
sb.append("<b>");
sb.append(surface);
sb.append("</b>");
}
};
suggester.build(new InputArrayIterator(keys));
results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a Penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>Penny</b> saved is a <b>penny</b> earned", results.get(0).highlightKey);
suggester.close();
a.close();
}
public void testDoubleClose() throws Exception {
Input keys[] = new Input[] {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
suggester.close();
suggester.close();
a.close();
}
public void testSuggestStopFilter() throws Exception {
final CharArraySet stopWords = StopFilter.makeStopSet("a");
Analyzer indexAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokens = new MockTokenizer();
return new TokenStreamComponents(tokens,
new StopFilter(tokens, stopWords));
}
};
Analyzer queryAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokens = new MockTokenizer();
return new TokenStreamComponents(tokens,
new SuggestStopFilter(tokens, stopWords));
}
};
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), indexAnalyzer, queryAnalyzer, 3, false);
Input keys[] = new Input[] {
new Input("a bob for apples", 10, new BytesRef("foobaz")),
};
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("a", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a bob for apples", results.get(0).key);
assertEquals("a bob for <b>a</b>pples", results.get(0).highlightKey);
suggester.close();
IOUtils.close(suggester, indexAnalyzer, queryAnalyzer);
}
public void testEmptyAtStart() throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(new Input[0]));
suggester.add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));
suggester.add(new BytesRef("lend me your ear"), null, 8, new BytesRef("foobar"));
suggester.refresh();
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
assertEquals(8, results.get(1).value);
assertEquals(new BytesRef("foobar"), results.get(1).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("pen", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("p", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
suggester.close();
a.close();
}
public void testBothExactAndPrefix() throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(new Input[0]));
suggester.add(new BytesRef("the pen is pretty"), null, 10, new BytesRef("foobaz"));
suggester.refresh();
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("pen p", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("the pen is pretty", results.get(0).key);
assertEquals("the <b>pen</b> is <b>p</b>retty", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
suggester.close();
a.close();
}
private static String randomText() {
int numWords = TestUtil.nextInt(random(), 1, 4);
StringBuilder b = new StringBuilder();
for(int i=0;i<numWords;i++) {
if (i > 0) {
b.append(' ');
}
b.append(TestUtil.randomSimpleString(random(), 1, 10));
}
return b.toString();
}
private static class Update {
long weight;
int index;
}
private static class LookupThread extends Thread {
private final AnalyzingInfixSuggester suggester;
private volatile boolean stop;
public LookupThread(AnalyzingInfixSuggester suggester) {
this.suggester = suggester;
}
public void finish() throws InterruptedException {
stop = true;
this.join();
}
@Override
public void run() {
while (stop == false) {
String query = randomText();
int topN = TestUtil.nextInt(random(), 1, 100);
boolean allTermsRequired = random().nextBoolean();
boolean doHilite = random().nextBoolean();
// We don't verify the results; just doing
// simultaneous lookups while adding/updating to
// see if there are any thread hazards:
try {
suggester.lookup(TestUtil.stringToCharSequence(query, random()),
topN, allTermsRequired, doHilite);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
}
@Slow
public void testRandomNRT() throws Exception {
final Path tempDir = createTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
int minPrefixChars = random().nextInt(7);
if (VERBOSE) {
System.out.println(" minPrefixChars=" + minPrefixChars);
}
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, minPrefixChars, false);
// Initial suggester built with nothing:
suggester.build(new InputArrayIterator(new Input[0]));
LookupThread lookupThread = new LookupThread(suggester);
lookupThread.start();
int iters = atLeast(100);
int visibleUpto = 0;
Set<Long> usedWeights = new HashSet<>();
Set<String> usedKeys = new HashSet<>();
List<Input> inputs = new ArrayList<>();
List<Update> pendingUpdates = new ArrayList<>();
for(int iter=0;iter<iters;iter++) {
String text;
while (true) {
text = randomText();
if (usedKeys.contains(text) == false) {
usedKeys.add(text);
break;
}
}
// Carefully pick a weight we never used, to sidestep
// tie-break problems:
long weight;
while (true) {
weight = random().nextInt(10*iters);
if (usedWeights.contains(weight) == false) {
usedWeights.add(weight);
break;
}
}
if (inputs.size() > 0 && random().nextInt(4) == 1) {
// Update an existing suggestion
Update update = new Update();
update.index = random().nextInt(inputs.size());
update.weight = weight;
Input input = inputs.get(update.index);
pendingUpdates.add(update);
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " update input=" + input.term.utf8ToString() + "/" + weight);
}
suggester.update(input.term, null, weight, input.term);
} else {
// Add a new suggestion
inputs.add(new Input(text, weight, new BytesRef(text)));
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " add input=" + text + "/" + weight);
}
BytesRef br = new BytesRef(text);
suggester.add(br, null, weight, br);
}
if (random().nextInt(15) == 7) {
if (VERBOSE) {
System.out.println("TEST: now refresh suggester");
}
suggester.refresh();
visibleUpto = inputs.size();
for(Update update : pendingUpdates) {
Input oldInput = inputs.get(update.index);
Input newInput = new Input(oldInput.term, update.weight, oldInput.payload);
inputs.set(update.index, newInput);
}
pendingUpdates.clear();
}
if (random().nextInt(50) == 7) {
if (VERBOSE) {
System.out.println("TEST: now close/reopen suggester");
}
lookupThread.finish();
suggester.close();
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, minPrefixChars, false);
lookupThread = new LookupThread(suggester);
lookupThread.start();
visibleUpto = inputs.size();
for(Update update : pendingUpdates) {
Input oldInput = inputs.get(update.index);
Input newInput = new Input(oldInput.term, update.weight, oldInput.payload);
inputs.set(update.index, newInput);
}
pendingUpdates.clear();
}
if (visibleUpto > 0) {
String query = randomText();
boolean lastPrefix = random().nextInt(5) != 1;
if (lastPrefix == false) {
query += " ";
}
String[] queryTerms = query.split("\\s");
boolean allTermsRequired = random().nextInt(10) == 7;
boolean doHilite = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: lookup \"" + query + "\" allTermsRequired=" + allTermsRequired + " doHilite=" + doHilite);
}
// Stupid slow but hopefully correct matching:
List<Input> expected = new ArrayList<>();
for(int i=0;i<visibleUpto;i++) {
Input input = inputs.get(i);
String[] inputTerms = input.term.utf8ToString().split("\\s");
boolean match = false;
for(int j=0;j<queryTerms.length;j++) {
if (j < queryTerms.length-1 || lastPrefix == false) {
// Exact match
for(int k=0;k<inputTerms.length;k++) {
if (inputTerms[k].equals(queryTerms[j])) {
match = true;
break;
}
}
} else {
// Prefix match
for(int k=0;k<inputTerms.length;k++) {
if (inputTerms[k].startsWith(queryTerms[j])) {
match = true;
break;
}
}
}
if (match) {
if (allTermsRequired == false) {
// At least one query term does match:
break;
}
match = false;
} else if (allTermsRequired) {
// At least one query term does not match:
break;
}
}
if (match) {
if (doHilite) {
expected.add(new Input(hilite(lastPrefix, inputTerms, queryTerms), input.v, input.term));
} else {
expected.add(input);
}
}
}
Collections.sort(expected,
(a1, b) -> {
if (a1.v > b.v) {
return -1;
} else if (a1.v < b.v) {
return 1;
} else {
return 0;
}
});
if (expected.isEmpty() == false) {
int topN = TestUtil.nextInt(random(), 1, expected.size());
List<LookupResult> actual = suggester.lookup(TestUtil.stringToCharSequence(query, random()), topN, allTermsRequired, doHilite);
int expectedCount = Math.min(topN, expected.size());
if (VERBOSE) {
System.out.println(" expected:");
for(int i=0;i<expectedCount;i++) {
Input x = expected.get(i);
System.out.println(" " + x.term.utf8ToString() + "/" + x.v);
}
System.out.println(" actual:");
for(LookupResult result : actual) {
System.out.println(" " + result);
}
}
assertEquals(expectedCount, actual.size());
for(int i=0;i<expectedCount;i++) {
if (doHilite) {
assertEquals(expected.get(i).term.utf8ToString(), actual.get(i).highlightKey);
} else {
assertEquals(expected.get(i).term.utf8ToString(), actual.get(i).key);
}
assertEquals(expected.get(i).v, actual.get(i).value);
assertEquals(expected.get(i).payload, actual.get(i).payload);
}
} else {
if (VERBOSE) {
System.out.println(" no expected matches");
}
}
}
}
lookupThread.finish();
suggester.close();
a.close();
}
private static String hilite(boolean lastPrefix, String[] inputTerms, String[] queryTerms) {
// Stupid slow but hopefully correct highlighter:
//System.out.println("hilite: lastPrefix=" + lastPrefix + " inputTerms=" + Arrays.toString(inputTerms) + " queryTerms=" + Arrays.toString(queryTerms));
StringBuilder b = new StringBuilder();
for(int i=0;i<inputTerms.length;i++) {
if (i > 0) {
b.append(' ');
}
String inputTerm = inputTerms[i];
//System.out.println(" inputTerm=" + inputTerm);
boolean matched = false;
for(int j=0;j<queryTerms.length;j++) {
String queryTerm = queryTerms[j];
//System.out.println(" queryTerm=" + queryTerm);
if (j < queryTerms.length-1 || lastPrefix == false) {
//System.out.println(" check exact");
if (inputTerm.equals(queryTerm)) {
b.append("<b>");
b.append(inputTerm);
b.append("</b>");
matched = true;
break;
}
} else if (inputTerm.startsWith(queryTerm)) {
b.append("<b>");
b.append(queryTerm);
b.append("</b>");
b.append(inputTerm.substring(queryTerm.length(), inputTerm.length()));
matched = true;
break;
}
}
if (matched == false) {
b.append(inputTerm);
}
}
return b.toString();
}
public void testBasicNRT() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
// Add a new suggestion:
suggester.add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));
// Must refresh to see any newly added suggestions:
suggester.refresh();
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
assertEquals(8, results.get(1).value);
assertEquals(new BytesRef("foobar"), results.get(1).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("pen", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("p", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
// Change the weight:
suggester.update(new BytesRef("lend me your ear"), null, 12, new BytesRef("foobox"));
// Must refresh to see any newly added suggestions:
suggester.refresh();
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(12, results.get(0).value);
assertEquals(new BytesRef("foobox"), results.get(0).payload);
assertEquals("a penny saved is a penny earned", results.get(1).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(1).highlightKey);
assertEquals(10, results.get(1).value);
assertEquals(new BytesRef("foobaz"), results.get(1).payload);
suggester.close();
a.close();
}
public void testNRTWithParallelAdds() throws IOException, InterruptedException {
String[] keys = new String[] {"python", "java", "c", "scala", "ruby", "clojure", "erlang", "go", "swift", "lisp"};
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
Path tempDir = createTempDir("AIS_NRT_PERSIST_TEST");
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
Thread[] multiAddThreads = new Thread[10];
// Cannot call refresh on an suggester when no docs are added to the index
expectThrows(IllegalStateException.class, () -> {
suggester.refresh();
});
for(int i=0; i<10; i++) {
multiAddThreads[i] = new Thread(new IndexDocument(suggester, keys[i]));
}
for(int i=0; i<10; i++) {
multiAddThreads[i].start();
}
//Make sure all threads have completed indexing
for(int i=0; i<10; i++) {
multiAddThreads[i].join();
}
suggester.refresh();
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("python", random()), 10, true, false);
assertEquals(1, results.size());
assertEquals("python", results.get(0).key);
//Test if the index is getting persisted correctly and can be reopened.
suggester.commit();
suggester.close();
AnalyzingInfixSuggester suggester2 = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
results = suggester2.lookup(TestUtil.stringToCharSequence("python", random()), 10, true, false);
assertEquals(1, results.size());
assertEquals("python", results.get(0).key);
suggester2.close();
a.close();
}
private static class IndexDocument implements Runnable {
AnalyzingInfixSuggester suggester;
String key;
private IndexDocument(AnalyzingInfixSuggester suggester, String key) {
this.suggester = suggester;
this.key = key;
}
@Override
public void run() {
try {
suggester.add(new BytesRef(key), null, 10, null);
} catch (IOException e) {
fail("Could not build suggest dictionary correctly");
}
}
}
private Set<BytesRef> asSet(String... values) {
HashSet<BytesRef> result = new HashSet<>();
for(String value : values) {
result.add(new BytesRef(value));
}
return result;
}
private Set<BytesRef> asSet(byte[]... values) {
HashSet<BytesRef> result = new HashSet<>();
for(byte[] value : values) {
result.add(new BytesRef(value));
}
return result;
}
// LUCENE-5528 and LUCENE-6464
public void testBasicContext() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet("foo", "baz"))
};
Path tempDir = createTempDir("analyzingInfixContext");
for(int iter=0;iter<2;iter++) {
AnalyzingInfixSuggester suggester;
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
if (iter == 0) {
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
} else {
// Test again, after close/reopen:
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
}
// No context provided, all results returned
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
LookupResult result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Both have "foo" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "bar" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar"), 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// None do not have "foo" context:
Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(0, results.size());
// Only one does not have "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
// Both have "foo" or "bar" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo", "bar"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Both have "bar" or "baz" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "foo" and "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// None have "bar" and "baz" context:
contextInfo.clear();
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(0, results.size());
// None do not have "foo" and do not have "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(0, results.size());
// Both do not have "bar" and do not have "baz" context:
contextInfo.clear();
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "foo" and does not have "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
//LUCENE-6464 Using the advanced context filtering by query.
//Note that this is just a sanity test as all the above tests run through the filter by query method
BooleanQuery.Builder query = new BooleanQuery.Builder();
suggester.addContextToQuery(query, new BytesRef("foo"), BooleanClause.Occur.MUST);
suggester.addContextToQuery(query, new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), query.build(), 10, true, true);
assertEquals(1, results.size());
suggester.close();
a.close();
}
}
@Test
public void testAddPrefixMatch() throws IOException {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
Directory dir = newDirectory();
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, a);
assertEquals("<b>Sol</b>r", pfmToString(suggester, "Solr", "Sol"));
assertEquals("<b>Solr</b>", pfmToString(suggester, "Solr", "Solr"));
// Test SOLR-6085 - the analyzed tokens match due to ss->ß normalization
assertEquals("<b>daß</b>", pfmToString(suggester, "daß", "dass"));
dir.close();
suggester.close();
a.close();
}
private String pfmToString(AnalyzingInfixSuggester suggester, String surface, String prefix) throws IOException {
StringBuilder sb = new StringBuilder();
suggester.addPrefixMatch(sb, surface, "", prefix);
return sb.toString();
}
public void testBinaryContext() throws Exception {
byte[] context1 = new byte[4];
byte[] context2 = new byte[5];
byte[] context3 = new byte[1];
context3[0] = (byte) 0xff;
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet(context1, context2)),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet(context1, context3))
};
Path tempDir = createTempDir("analyzingInfixContext");
for(int iter=0;iter<2;iter++) {
AnalyzingInfixSuggester suggester;
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
if (iter == 0) {
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
} else {
// Test again, after close/reopen:
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
}
// Both have context1:
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet(context1), 10, true, true);
assertEquals(2, results.size());
LookupResult result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef(context1)));
assertTrue(result.contexts.contains(new BytesRef(context3)));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef(context1)));
assertTrue(result.contexts.contains(new BytesRef(context2)));
suggester.close();
a.close();
}
}
public void testContextNotAllTermsRequired() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet("foo", "baz"))
};
Path tempDir = createTempDir("analyzingInfixContext");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
// No context provided, all results returned
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, false, true);
assertEquals(2, results.size());
LookupResult result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Both have "foo" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, false, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "foo" context and len
results = suggester.lookup(TestUtil.stringToCharSequence("len", random()), asSet("foo"), 10, false, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("lend me your ear", result.key);
assertEquals("<b>len</b>d me your ear", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
suggester.close();
}
public void testCloseIndexWriterOnBuild() throws Exception {
class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {
public MyAnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired,
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild,
allTermsRequired, highlight, closeIndexWriterOnBuild);
}
public IndexWriter getIndexWriter() {
return writer;
}
public SearcherManager getSearcherManager() {
return searcherMgr;
}
}
// After build(), when closeIndexWriterOnBuild = true:
// * The IndexWriter should be null
// * The SearcherManager should be non-null
// * SearcherManager's IndexWriter reference should be closed
// (as evidenced by maybeRefreshBlocking() throwing AlreadyClosedException)
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
Path tempDir = createTempDir("analyzingInfixContext");
final MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
suggester.build(new InputArrayIterator(sharedInputs));
assertNull(suggester.getIndexWriter());
assertNotNull(suggester.getSearcherManager());
expectThrows(AlreadyClosedException.class, () -> suggester.getSearcherManager().maybeRefreshBlocking());
suggester.close();
// After instantiating from an already-built suggester dir:
// * The IndexWriter should be null
// * The SearcherManager should be non-null
final MyAnalyzingInfixSuggester suggester2 = new MyAnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
assertNull(suggester2.getIndexWriter());
assertNotNull(suggester2.getSearcherManager());
suggester2.close();
a.close();
}
public void testCommitAfterBuild() throws Exception {
performOperationWithAllOptionCombinations(suggester -> {
suggester.build(new InputArrayIterator(sharedInputs));
suggester.commit();
});
}
public void testRefreshAfterBuild() throws Exception {
performOperationWithAllOptionCombinations(suggester -> {
suggester.build(new InputArrayIterator(sharedInputs));
suggester.refresh();
});
}
public void testDisallowCommitBeforeBuild() throws Exception {
performOperationWithAllOptionCombinations
(suggester -> expectThrows(IllegalStateException.class, suggester::commit));
}
public void testDisallowRefreshBeforeBuild() throws Exception {
performOperationWithAllOptionCombinations
(suggester -> expectThrows(IllegalStateException.class, suggester::refresh));
}
private Input sharedInputs[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
private interface SuggesterOperation {
void operate(AnalyzingInfixSuggester suggester) throws Exception;
}
/**
* Perform the given operation on suggesters constructed with all combinations of options
* commitOnBuild and closeIndexWriterOnBuild, including defaults.
*/
private void performOperationWithAllOptionCombinations(SuggesterOperation operation) throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
operation.operate(suggester);
suggester.close();
a.close();
}
}