blob: 485d4e76d66c9069bbfec96741a12d412a984006 [file] [log] [blame]
using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search.Suggest.Fst
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class WFSTCompletionTest : LuceneTestCase
{
[Test]
public void TestBasic()
{
Input[] keys = new Input[] {
new Input("foo", 50),
new Input("bar", 10),
new Input("barbar", 12),
new Input("barbara", 6)
};
Random random = new Random(Random.Next());
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
suggester.Build(new InputArrayEnumerator(keys));
// top N of 2, but only foo is available
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("f", random).ToString(), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results[0].Key.toString());
assertEquals(50, results[0].Value, 0.01F);
// make sure we don't get a dup exact suggestion:
results = suggester.DoLookup(TestUtil.StringToCharSequence("foo", random).ToString(), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results[0].Key.toString());
assertEquals(50, results[0].Value, 0.01F);
// top N of 1 for 'bar': we return this even though barbar is higher
results = suggester.DoLookup(TestUtil.StringToCharSequence("bar", random).ToString(), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results[0].Key.toString());
assertEquals(10, results[0].Value, 0.01F);
// top N Of 2 for 'b'
results = suggester.DoLookup(TestUtil.StringToCharSequence("b", random).ToString(), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results[0].Key.toString());
assertEquals(12, results[0].Value, 0.01F);
assertEquals("bar", results[1].Key.toString());
assertEquals(10, results[1].Value, 0.01F);
// top N of 3 for 'ba'
results = suggester.DoLookup(TestUtil.StringToCharSequence("ba", random).ToString(), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results[0].Key.toString());
assertEquals(12, results[0].Value, 0.01F);
assertEquals("bar", results[1].Key.toString());
assertEquals(10, results[1].Value, 0.01F);
assertEquals("barbara", results[2].Key.toString());
assertEquals(6, results[2].Value, 0.01F);
}
[Test]
public void TestExactFirst()
{
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
suggester.Build(new InputArrayEnumerator(new Input[] {
new Input("x y", 20),
new Input("x", 2),
}));
for (int topN = 1; topN < 4; topN++)
{
IList<Lookup.LookupResult> results = suggester.DoLookup("x", false, topN);
assertEquals(Math.Min(topN, 2), results.size());
assertEquals("x", results[0].Key);
assertEquals(2, results[0].Value);
if (topN > 1)
{
assertEquals("x y", results[1].Key);
assertEquals(20, results[1].Value);
}
}
}
[Test]
public void TestNonExactFirst()
{
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.Build(new InputArrayEnumerator(new Input[] {
new Input("x y", 20),
new Input("x", 2),
}));
for (int topN = 1; topN < 4; topN++)
{
IList<Lookup.LookupResult> results = suggester.DoLookup("x", false, topN);
assertEquals(Math.Min(topN, 2), results.size());
assertEquals("x y", results[0].Key);
assertEquals(20, results[0].Value);
if (topN > 1)
{
assertEquals("x", results[1].Key);
assertEquals(2, results[1].Value);
}
}
}
[Test]
public void TestRandom()
{
int numWords = AtLeast(1000);
IDictionary<string, long> slowCompletor = new JCG.SortedDictionary<string, long>(StringComparer.Ordinal);
ISet<string> allPrefixes = new JCG.SortedSet<string>(StringComparer.Ordinal);
Input[] keys = new Input[numWords];
for (int i = 0; i < numWords; i++)
{
String s;
while (true)
{
// TODO: would be nice to fix this slowCompletor/comparer to
// use full range, but we might lose some coverage too...
s = TestUtil.RandomSimpleString(LuceneTestCase.Random);
if (!slowCompletor.ContainsKey(s))
{
break;
}
}
for (int j = 1; j < s.Length; j++)
{
allPrefixes.add(s.Substring(0, j));
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = LuceneTestCase.Random.nextInt(1 << 24);
slowCompletor.Put(s, (long)weight);
keys[i] = new Input(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.Build(new InputArrayEnumerator(keys));
assertEquals(numWords, suggester.Count);
Random random = new Random(Random.Next());
foreach (String prefix in allPrefixes)
{
int topN = TestUtil.NextInt32(random, 1, 10);
IList<Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence(prefix, random).ToString(), false, topN);
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
List<Lookup.LookupResult> matches = new List<Lookup.LookupResult>();
// TODO: could be faster... but its slowCompletor for a reason
foreach (KeyValuePair<string, long> e in slowCompletor)
{
if (e.Key.StartsWith(prefix, StringComparison.Ordinal))
{
matches.Add(new Lookup.LookupResult(e.Key, e.Value));
}
}
assertTrue(matches.size() > 0);
matches.Sort(new TestRandomComparer());
if (matches.size() > topN)
{
//matches.SubList(topN, matches.size()).clear();
matches.RemoveRange(topN, matches.size() - topN);
}
assertEquals(matches.size(), r.size());
for (int hit = 0; hit < r.size(); hit++)
{
//System.out.println(" check hit " + hit);
assertEquals(matches[hit].Key.toString(), r[hit].Key.toString());
assertEquals(matches[hit].Value, r[hit].Value, 0f);
}
}
}
internal class TestRandomComparer : IComparer<Lookup.LookupResult>
{
public int Compare(Lookup.LookupResult left, Lookup.LookupResult right)
{
int cmp = ((float)right.Value).CompareTo((float)left.Value);
if (cmp == 0)
{
return left.CompareTo(right);
}
else
{
return cmp;
}
}
}
[Test]
public void Test0ByteKeys()
{
BytesRef key1 = new BytesRef(4);
key1.Length = 4;
BytesRef key2 = new BytesRef(3);
key1.Length = 3;
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.Build(new InputArrayEnumerator(new Input[] {
new Input(key1, 50),
new Input(key2, 50),
}));
}
[Test]
public void TestEmpty()
{
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
suggester.Build(new InputArrayEnumerator(new Input[0]));
assertEquals(0, suggester.Count);
IList<Lookup.LookupResult> result = suggester.DoLookup("a", false, 20);
assertTrue(result.Count == 0);
}
}
}