blob: f225ef99c38912c3865cc349d6691e237a7fa708 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.fst;
import java.util.*;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class WFSTCompletionTest extends LuceneTestCase {
public void testBasic() throws Exception {
Input keys[] = new Input[] {
new Input("foo", 50),
new Input("bar", 10),
new Input("barbar", 12),
new Input("barbara", 6)
};
Random random = new Random(random().nextLong());
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst");
suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// make sure we don't get a dup exact suggestion:
results = suggester.lookup(TestUtil.stringToCharSequence("foo", random), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though barbar is higher
results = suggester.lookup(TestUtil.stringToCharSequence("bar", random), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b'
results = suggester.lookup(TestUtil.stringToCharSequence("b", random), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba'
results = suggester.lookup(TestUtil.stringToCharSequence("ba", random), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
tempDir.close();
}
public void testExactFirst() throws Exception {
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 20),
new Input("x", 2),
}));
for(int topN=1;topN<4;topN++) {
List<LookupResult> results = suggester.lookup("x", false, topN);
assertEquals(Math.min(topN, 2), results.size());
assertEquals("x", results.get(0).key);
assertEquals(2, results.get(0).value);
if (topN > 1) {
assertEquals("x y", results.get(1).key);
assertEquals(20, results.get(1).value);
}
}
tempDir.close();
}
public void testNonExactFirst() throws Exception {
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 20),
new Input("x", 2),
}));
for(int topN=1;topN<4;topN++) {
List<LookupResult> results = suggester.lookup("x", false, topN);
assertEquals(Math.min(topN, 2), results.size());
assertEquals("x y", results.get(0).key);
assertEquals(20, results.get(0).value);
if (topN > 1) {
assertEquals("x", results.get(1).key);
assertEquals(2, results.get(1).value);
}
}
tempDir.close();
}
public void testRandom() throws Exception {
int numWords = atLeast(1000);
final TreeMap<String,Long> slowCompletor = new TreeMap<>();
final TreeSet<String> allPrefixes = new TreeSet<>();
Input[] keys = new Input[numWords];
for (int i = 0; i < numWords; i++) {
String s;
while (true) {
// TODO: would be nice to fix this slowCompletor/comparator to
// use full range, but we might lose some coverage too...
s = TestUtil.randomSimpleString(random());
if (!slowCompletor.containsKey(s)) {
break;
}
}
for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j));
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
slowCompletor.put(s, (long)weight);
keys[i] = new Input(s, weight);
}
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(keys));
assertEquals(numWords, suggester.getCount());
Random random = new Random(random().nextLong());
for (String prefix : allPrefixes) {
final int topN = TestUtil.nextInt(random, 1, 10);
List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence(prefix, random), false, topN);
// 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
final List<LookupResult> matches = new ArrayList<>();
// TODO: could be faster... but it's slowCompletor for a reason
for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
if (e.getKey().startsWith(prefix)) {
matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
}
}
assertTrue(matches.size() > 0);
Collections.sort(matches, new Comparator<LookupResult>() {
@Override
public int compare(LookupResult left, LookupResult right) {
int cmp = Float.compare(right.value, left.value);
if (cmp == 0) {
return left.compareTo(right);
} else {
return cmp;
}
}
});
if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear();
}
assertEquals(matches.size(), r.size());
for(int hit=0;hit<r.size();hit++) {
//System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
}
}
tempDir.close();
}
public void test0ByteKeys() throws Exception {
BytesRef key1 = new BytesRef(4);
key1.length = 4;
BytesRef key2 = new BytesRef(3);
key1.length = 3;
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[] {
new Input(key1, 50),
new Input(key2, 50),
}));
tempDir.close();
}
public void testEmpty() throws Exception {
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[0]));
assertEquals(0, suggester.getCount());
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
tempDir.close();
}
private Directory getDirectory() {
return newDirectory();
}
}