| using J2N.Text; |
| using Lucene.Net.Analysis; |
| using Lucene.Net.Analysis.TokenAttributes; |
| using Lucene.Net.Documents; |
| using Lucene.Net.Util; |
| using NUnit.Framework; |
| using System; |
| using System.Collections.Generic; |
| using System.Diagnostics; |
| using System.IO; |
| using System.Linq; |
| using System.Text; |
| using System.Text.RegularExpressions; |
| using JCG = J2N.Collections.Generic; |
| using Console = Lucene.Net.Support.SystemConsole; |
| using J2N.Collections.Generic.Extensions; |
| |
| namespace Lucene.Net.Search.Suggest.Analyzing |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| public class AnalyzingSuggesterTest : LuceneTestCase |
| { |
| /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ |
| [Test] |
| public void TestKeyword() |
| { |
| IEnumerable<Input> keys = Shuffle( |
| new Input("foo", 50), |
| new Input("bar", 10), |
| new Input("barbar", 10), |
| new Input("barbar", 12), |
| new Input("barbara", 6), |
| new Input("bar", 5), |
| new Input("barbara", 1) |
| ); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)); |
| suggester.Build(new InputArrayIterator(keys)); |
| |
| // top N of 2, but only foo is available |
| IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("f", Random).ToString(), false, 2); |
| assertEquals(1, results.size()); |
| assertEquals("foo", results.ElementAt(0).Key.toString()); |
| assertEquals(50, results.ElementAt(0).Value, 0.01F); |
| |
| // top N of 1 for 'bar': we return this even though |
| // barbar is higher because exactFirst is enabled: |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("bar", Random).ToString(), false, 1); |
| assertEquals(1, results.size()); |
| assertEquals("bar", results.ElementAt(0).Key.toString()); |
| assertEquals(10, results.ElementAt(0).Value, 0.01F); |
| |
| // top N Of 2 for 'b' |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("b", Random).ToString(), false, 2); |
| assertEquals(2, results.size()); |
| assertEquals("barbar", results.ElementAt(0).Key.toString()); |
| assertEquals(12, results.ElementAt(0).Value, 0.01F); |
| assertEquals("bar", results.ElementAt(1).Key.toString()); |
| assertEquals(10, results.ElementAt(1).Value, 0.01F); |
| |
| // top N of 3 for 'ba' |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("ba", Random).ToString(), false, 3); |
| assertEquals(3, results.size()); |
| assertEquals("barbar", results.ElementAt(0).Key.toString()); |
| assertEquals(12, results.ElementAt(0).Value, 0.01F); |
| assertEquals("bar", results.ElementAt(1).Key.toString()); |
| assertEquals(10, results.ElementAt(1).Value, 0.01F); |
| assertEquals("barbara", results.ElementAt(2).Key.toString()); |
| assertEquals(6, results.ElementAt(2).Value, 0.01F); |
| } |
| |
| [Test] |
| public void TestKeywordWithPayloads() |
| { |
| IEnumerable<Input> keys = Shuffle( |
| new Input("foo", 50, new BytesRef("hello")), |
| new Input("bar", 10, new BytesRef("goodbye")), |
| new Input("barbar", 12, new BytesRef("thank you")), |
| new Input("bar", 9, new BytesRef("should be deduplicated")), |
| new Input("bar", 8, new BytesRef("should also be deduplicated")), |
| new Input("barbara", 6, new BytesRef("for all the fish"))); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)); |
| suggester.Build(new InputArrayIterator(keys)); |
| for (int i = 0; i < 2; i++) |
| { |
| // top N of 2, but only foo is available |
| IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("f", Random).ToString(), false, 2); |
| assertEquals(1, results.size()); |
| assertEquals("foo", results.ElementAt(0).Key.toString()); |
| assertEquals(50, results.ElementAt(0).Value, 0.01F); |
| assertEquals(new BytesRef("hello"), results.ElementAt(0).Payload); |
| |
| // top N of 1 for 'bar': we return this even though |
| // barbar is higher because exactFirst is enabled: |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("bar", Random).ToString(), false, 1); |
| assertEquals(1, results.size()); |
| assertEquals("bar", results.ElementAt(0).Key.toString()); |
| assertEquals(10, results.ElementAt(0).Value, 0.01F); |
| assertEquals(new BytesRef("goodbye"), results.ElementAt(0).Payload); |
| |
| // top N Of 2 for 'b' |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("b", Random).ToString(), false, 2); |
| assertEquals(2, results.size()); |
| assertEquals("barbar", results.ElementAt(0).Key.toString()); |
| assertEquals(12, results.ElementAt(0).Value, 0.01F); |
| assertEquals(new BytesRef("thank you"), results.ElementAt(0).Payload); |
| assertEquals("bar", results.ElementAt(1).Key.toString()); |
| assertEquals(10, results.ElementAt(1).Value, 0.01F); |
| assertEquals(new BytesRef("goodbye"), results.ElementAt(1).Payload); |
| |
| // top N of 3 for 'ba' |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("ba", Random).ToString(), false, 3); |
| assertEquals(3, results.size()); |
| assertEquals("barbar", results.ElementAt(0).Key.toString()); |
| assertEquals(12, results.ElementAt(0).Value, 0.01F); |
| assertEquals(new BytesRef("thank you"), results.ElementAt(0).Payload); |
| assertEquals("bar", results.ElementAt(1).Key.toString()); |
| assertEquals(10, results.ElementAt(1).Value, 0.01F); |
| assertEquals(new BytesRef("goodbye"), results.ElementAt(1).Payload); |
| assertEquals("barbara", results.ElementAt(2).Key.toString()); |
| assertEquals(6, results.ElementAt(2).Value, 0.01F); |
| assertEquals(new BytesRef("for all the fish"), results.ElementAt(2).Payload); |
| } |
| } |
| |
| [Test] |
| public void TestRandomRealisticKeys() |
| { |
| LineFileDocs lineFile = new LineFileDocs(Random); |
| IDictionary<string, long> mapping = new JCG.Dictionary<string, long>(); |
| List<Input> keys = new List<Input>(); |
| |
| int howMany = AtLeast(100); // this might bring up duplicates |
| for (int i = 0; i < howMany; i++) |
| { |
| Document nextDoc = lineFile.NextDoc(); |
| string title = nextDoc.GetField("title").GetStringValue(); |
| int randomWeight = Random.nextInt(100); |
| keys.Add(new Input(title, randomWeight)); |
| if (!mapping.TryGetValue(title, out long titleValue) || titleValue < randomWeight) |
| { |
| mapping[title] = Convert.ToInt64(randomWeight); |
| } |
| } |
| AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(new MockAnalyzer(Random), new MockAnalyzer(Random), |
| SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP, 256, -1, Random.nextBoolean()); |
| bool doPayloads = Random.nextBoolean(); |
| if (doPayloads) |
| { |
| List<Input> keysAndPayloads = new List<Input>(); |
| foreach (Input termFreq in keys) |
| { |
| keysAndPayloads.Add(new Input(termFreq.term, termFreq.v, new BytesRef(termFreq.v.ToString()))); |
| } |
| analyzingSuggester.Build(new InputArrayIterator(keysAndPayloads)); |
| } |
| else |
| { |
| analyzingSuggester.Build(new InputArrayIterator(keys)); |
| } |
| |
| foreach (Input termFreq in keys) |
| { |
| IList<Lookup.LookupResult> lookup = analyzingSuggester.DoLookup(termFreq.term.Utf8ToString(), false, keys.size()); |
| foreach (Lookup.LookupResult lookupResult in lookup) |
| { |
| assertEquals(mapping[lookupResult.Key], lookupResult.Value); |
| if (doPayloads) |
| { |
| assertEquals(lookupResult.Payload.Utf8ToString(), lookupResult.Value.ToString()); |
| } |
| else |
| { |
| assertNull(lookupResult.Payload); |
| } |
| } |
| } |
| |
| lineFile.Dispose(); |
| } |
| |
| // TODO: more tests |
| /** |
| * basic "standardanalyzer" test with stopword removal |
| */ |
| [Test] |
| public void TestStandard() |
| { |
| Input[] keys = new Input[] { |
| new Input("the ghost of christmas past", 50), |
| }; |
| |
| Analyzer standard = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(standard, standard, |
| SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP, 256, -1, false); |
| |
| suggester.Build(new InputArrayIterator(keys)); |
| |
| IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("the ghost of chris", Random).ToString(), false, 1); |
| assertEquals(1, results.size()); |
| assertEquals("the ghost of christmas past", results.ElementAt(0).Key.toString()); |
| assertEquals(50, results.ElementAt(0).Value, 0.01F); |
| |
| // omit the 'the' since its a stopword, its suggested anyway |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("ghost of chris", Random).ToString(), false, 1); |
| assertEquals(1, results.size()); |
| assertEquals("the ghost of christmas past", results.ElementAt(0).Key.toString()); |
| assertEquals(50, results.ElementAt(0).Value, 0.01F); |
| |
| // omit the 'the' and 'of' since they are stopwords, its suggested anyway |
| results = suggester.DoLookup(TestUtil.StringToCharSequence("ghost chris", Random).ToString(), false, 1); |
| assertEquals(1, results.size()); |
| assertEquals("the ghost of christmas past", results.ElementAt(0).Key.toString()); |
| assertEquals(50, results.ElementAt(0).Value, 0.01F); |
| } |
| |
| [Test] |
| public void TestEmpty() |
| { |
| Analyzer standard = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(standard); |
| suggester.Build(new InputArrayIterator(new Input[0])); |
| |
| IList<Lookup.LookupResult> result = suggester.DoLookup("a", false, 20); |
| assertTrue(!result.Any()); |
| } |
| |
| [Test] |
| public void TestNoSeps() |
| { |
| Input[] |
| keys = new Input[] { |
| new Input("ab cd", 0), |
| new Input("abcd", 1), |
| }; |
| |
| SuggesterOptions options = 0; |
| |
| Analyzer a = new MockAnalyzer(Random); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true); |
| suggester.Build(new InputArrayIterator(keys)); |
| // TODO: would be nice if "ab " would allow the test to |
| // pass, and more generally if the analyzer can know |
| // that the user's current query has ended at a word, |
| // but, analyzers don't produce SEP tokens! |
| IList<Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence("ab c", Random).ToString(), false, 2); |
| assertEquals(2, r.size()); |
| |
| // With no PRESERVE_SEPS specified, "ab c" should also |
| // complete to "abcd", which has higher weight so should |
| // appear first: |
| assertEquals("abcd", r.ElementAt(0).Key.toString()); |
| } |
| |
| internal class TestGraphDupsTokenStreamComponents : TokenStreamComponents |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| internal int tokenStreamCounter = 0; |
| internal readonly TokenStream[] tokenStreams = new TokenStream[] { |
| new CannedTokenStream(new Token[] { |
| AnalyzingSuggesterTest.NewToken("wifi",1,1), |
| AnalyzingSuggesterTest.NewToken("hotspot",0,2), |
| AnalyzingSuggesterTest.NewToken("network",1,1), |
| AnalyzingSuggesterTest.NewToken("is",1,1), |
| AnalyzingSuggesterTest.NewToken("slow",1,1) |
| }), |
| new CannedTokenStream(new Token[] { |
| AnalyzingSuggesterTest.NewToken("wi",1,1), |
| AnalyzingSuggesterTest.NewToken("hotspot",0,3), |
| AnalyzingSuggesterTest.NewToken("fi",1,1), |
| AnalyzingSuggesterTest.NewToken("network",1,1), |
| AnalyzingSuggesterTest.NewToken("is",1,1), |
| AnalyzingSuggesterTest.NewToken("fast",1,1) |
| |
| }), |
| new CannedTokenStream(new Token[] { |
| AnalyzingSuggesterTest.NewToken("wifi",1,1), |
| AnalyzingSuggesterTest.NewToken("hotspot",0,2), |
| AnalyzingSuggesterTest.NewToken("network",1,1) |
| }), |
| }; |
| |
| public TestGraphDupsTokenStreamComponents(AnalyzingSuggesterTest outerInstance, Tokenizer tokenizer) |
| : base(tokenizer) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| TokenStream result = tokenStreams[tokenStreamCounter]; |
| tokenStreamCounter++; |
| return result; |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| |
| internal class TestGraphDupsAnalyzer : Analyzer |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| public TestGraphDupsAnalyzer(AnalyzingSuggesterTest outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new TestGraphDupsTokenStreamComponents(outerInstance, tokenizer); |
| } |
| } |
| |
| |
| |
| [Test] |
| public void TestGraphDups() |
| { |
| Analyzer analyzer = new TestGraphDupsAnalyzer(this); |
| |
| Input[] keys = new Input[] { |
| new Input("wifi network is slow", 50), |
| new Input("wi fi network is fast", 10), |
| }; |
| //AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); |
| suggester.Build(new InputArrayIterator(keys)); |
| IList<Lookup.LookupResult> results = suggester.DoLookup("wifi network", false, 10); |
| if (VERBOSE) |
| { |
| Console.WriteLine("Results: " + results); |
| } |
| assertEquals(2, results.size()); |
| assertEquals("wifi network is slow", results.ElementAt(0).Key); |
| assertEquals(50, results.ElementAt(0).Value); |
| assertEquals("wi fi network is fast", results.ElementAt(1).Key); |
| assertEquals(10, results.ElementAt(1).Value); |
| } |
| |
| internal class TestInputPathRequiredTokenStreamComponents : TokenStreamComponents |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| internal int tokenStreamCounter = 0; |
| internal TokenStream[] tokenStreams = new TokenStream[] { |
| new CannedTokenStream(new Token[] { |
| AnalyzingSuggesterTest.NewToken("ab",1,1), |
| AnalyzingSuggesterTest.NewToken("ba",0,1), |
| AnalyzingSuggesterTest.NewToken("xc",1,1) |
| }), |
| new CannedTokenStream(new Token[] { |
| AnalyzingSuggesterTest.NewToken("ba",1,1), |
| AnalyzingSuggesterTest.NewToken("xd",1,1) |
| }), |
| new CannedTokenStream(new Token[] { |
| AnalyzingSuggesterTest.NewToken("ab",1,1), |
| AnalyzingSuggesterTest.NewToken("ba",0,1), |
| AnalyzingSuggesterTest.NewToken("x",1,1) |
| }) |
| }; |
| |
| public TestInputPathRequiredTokenStreamComponents(AnalyzingSuggesterTest outerInstance, Tokenizer tokenizer) |
| : base(tokenizer) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| TokenStream result = tokenStreams[tokenStreamCounter]; |
| tokenStreamCounter++; |
| return result; |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| |
| internal class TestInputPathRequiredAnalyzer : Analyzer |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| |
| public TestInputPathRequiredAnalyzer(AnalyzingSuggesterTest outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new TestInputPathRequiredTokenStreamComponents(outerInstance, tokenizer); |
| } |
| } |
| |
| [Test] |
| public void TestInputPathRequired() |
| { |
| |
| // SynonymMap.Builder b = new SynonymMap.Builder(false); |
| // b.add(new CharsRef("ab"), new CharsRef("ba"), true); |
| // final SynonymMap map = b.build(); |
| |
| // The Analyzer below mimics the functionality of the SynonymAnalyzer |
| // using the above map, so that the suggest module does not need a dependency on the |
| // synonym module |
| |
| Analyzer analyzer = new TestInputPathRequiredAnalyzer(this); |
| |
| Input[] keys = new Input[] { |
| new Input("ab xc", 50), |
| new Input("ba xd", 50), |
| }; |
| AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); |
| suggester.Build(new InputArrayIterator(keys)); |
| IList<Lookup.LookupResult> results = suggester.DoLookup("ab x", false, 1); |
| assertTrue(results.size() == 1); |
| } |
| |
| internal static Token NewToken(string term, int posInc, int posLength) |
| { |
| Token t = new Token(term, 0, 0); |
| t.PositionIncrement = (posInc); |
| t.PositionLength = (posLength); |
| return t; |
| } |
| |
| internal static BinaryToken NewToken(BytesRef term) |
| { |
| return new BinaryToken(term); |
| } |
| |
| /* |
| private void printTokens(final Analyzer analyzer, String input) throws IOException { |
| System.out.println("Tokens for " + input); |
| TokenStream ts = analyzer.tokenStream("", new StringReader(input)); |
| ts.reset(); |
| final TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); |
| final PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); |
| final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class); |
| |
| while(ts.incrementToken()) { |
| termBytesAtt.fillBytesRef(); |
| System.out.println(String.format("%s,%s,%s", termBytesAtt.getBytesRef().utf8ToString(), posIncAtt.getPositionIncrement(), posLengthAtt.getPositionLength())); |
| } |
| ts.end(); |
| ts.close(); |
| } |
| */ |
| |
| internal class UsualTokenStreamComponents : TokenStreamComponents |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| internal int count; |
| |
| public UsualTokenStreamComponents(AnalyzingSuggesterTest outerInstance, Tokenizer tokenizer) |
| : base(tokenizer) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| // 4th time we are called, return tokens a b, |
| // else just a: |
| if (count++ != 3) |
| { |
| return new CannedTokenStream(new Token[] { |
| NewToken("a", 1, 1), |
| }); |
| } |
| else |
| { |
| // After that "a b": |
| return new CannedTokenStream(new Token[] { |
| NewToken("a", 1, 1), |
| NewToken("b", 1, 1), |
| }); |
| } |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| internal class UsualAnalyzer : Analyzer |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| public UsualAnalyzer(AnalyzingSuggesterTest outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new UsualTokenStreamComponents(outerInstance, tokenizer); |
| } |
| } |
| |
| private Analyzer GetUnusualAnalyzer() |
| { |
| return new UsualAnalyzer(this); |
| } |
| |
| [Test] |
| public void TestExactFirst() |
| { |
| |
| Analyzer a = GetUnusualAnalyzer(); |
| SuggesterOptions options = SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP; |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true); |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("x y", 1), |
| new Input("x y z", 3), |
| new Input("x", 2), |
| new Input("z z z", 20), |
| })); |
| |
| //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); |
| |
| for (int topN = 1; topN < 6; topN++) |
| { |
| IList<Lookup.LookupResult> results = suggester.DoLookup("x y", false, topN); |
| //System.out.println("topN=" + topN + " " + results); |
| |
| assertEquals(Math.Min(topN, 4), results.size()); |
| |
| assertEquals("x y", results.ElementAt(0).Key); |
| assertEquals(1, results.ElementAt(0).Value); |
| |
| if (topN > 1) |
| { |
| assertEquals("z z z", results.ElementAt(1).Key); |
| assertEquals(20, results.ElementAt(1).Value); |
| |
| if (topN > 2) |
| { |
| assertEquals("x y z", results.ElementAt(2).Key); |
| assertEquals(3, results.ElementAt(2).Value); |
| |
| if (topN > 3) |
| { |
| assertEquals("x", results.ElementAt(3).Key); |
| assertEquals(2, results.ElementAt(3).Value); |
| } |
| } |
| } |
| } |
| } |
| |
| [Test] |
| public void TestNonExactFirst() |
| { |
| |
| Analyzer a = GetUnusualAnalyzer(); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, SuggesterOptions.PRESERVE_SEP, 256, -1, true); |
| |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("x y", 1), |
| new Input("x y z", 3), |
| new Input("x", 2), |
| new Input("z z z", 20), |
| })); |
| |
| for (int topN = 1; topN < 6; topN++) |
| { |
| IList<Lookup.LookupResult> results = suggester.DoLookup("p", false, topN); |
| |
| assertEquals(Math.Min(topN, 4), results.size()); |
| |
| assertEquals("z z z", results.ElementAt(0).Key); |
| assertEquals(20, results.ElementAt(0).Value); |
| |
| if (topN > 1) |
| { |
| assertEquals("x y z", results.ElementAt(1).Key); |
| assertEquals(3, results.ElementAt(1).Value); |
| |
| if (topN > 2) |
| { |
| assertEquals("x", results.ElementAt(2).Key); |
| assertEquals(2, results.ElementAt(2).Value); |
| |
| if (topN > 3) |
| { |
| assertEquals("x y", results.ElementAt(3).Key); |
| assertEquals(1, results.ElementAt(3).Value); |
| } |
| } |
| } |
| } |
| } |
| |
| // Holds surface form separately: |
| internal class TermFreq2 : IComparable<TermFreq2> |
| { |
| public readonly string surfaceForm; |
| public readonly string analyzedForm; |
| public readonly long weight; |
| public readonly BytesRef payload; |
| |
| public TermFreq2(string surfaceForm, string analyzedForm, long weight, BytesRef payload) |
| { |
| this.surfaceForm = surfaceForm; |
| this.analyzedForm = analyzedForm; |
| this.weight = weight; |
| this.payload = payload; |
| } |
| |
| public int CompareTo(TermFreq2 other) |
| { |
| int cmp = analyzedForm.CompareToOrdinal(other.analyzedForm); |
| if (cmp != 0) |
| { |
| return cmp; |
| } |
| else if (weight > other.weight) |
| { |
| return -1; |
| } |
| else if (weight < other.weight) |
| { |
| return 1; |
| } |
| else |
| { |
| Debug.Assert(false); |
| return 0; |
| } |
| } |
| |
| public override string ToString() |
| { |
| return surfaceForm + "/" + weight; |
| } |
| } |
| |
| internal static bool IsStopChar(char ch, int numStopChars) |
| { |
| //System.out.println("IS? " + ch + ": " + (ch - 'a') + ": " + ((ch - 'a') < numStopChars)); |
| return (ch - 'a') < numStopChars; |
| } |
| |
| // Like StopFilter: |
| internal sealed class TokenEater : TokenFilter |
| { |
| private readonly IPositionIncrementAttribute posIncrAtt; |
| private readonly ICharTermAttribute termAtt; |
| private readonly int numStopChars; |
| private readonly bool preserveHoles; |
| private bool first; |
| |
| public TokenEater(bool preserveHoles, TokenStream @in, int numStopChars) |
| : base(@in) |
| { |
| this.preserveHoles = preserveHoles; |
| this.numStopChars = numStopChars; |
| this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); |
| this.termAtt = AddAttribute<ICharTermAttribute>(); |
| } |
| |
| public override void Reset() |
| { |
| base.Reset(); |
| first = true; |
| } |
| |
| public override bool IncrementToken() |
| { |
| int skippedPositions = 0; |
| while (m_input.IncrementToken()) |
| { |
| if (termAtt.Length != 1 || !IsStopChar(termAtt.Buffer[0], numStopChars)) |
| { |
| int posInc = posIncrAtt.PositionIncrement + skippedPositions; |
| if (first) |
| { |
| if (posInc == 0) |
| { |
| // first token having posinc=0 is illegal. |
| posInc = 1; |
| } |
| first = false; |
| } |
| posIncrAtt.PositionIncrement = (posInc); |
| //System.out.println("RETURN term=" + termAtt + " numStopChars=" + numStopChars); |
| return true; |
| } |
| if (preserveHoles) |
| { |
| skippedPositions += posIncrAtt.PositionIncrement; |
| } |
| } |
| |
| return false; |
| } |
| } |
| |
| internal class MockTokenEatingAnalyzer : Analyzer |
| { |
| private int numStopChars; |
| private bool preserveHoles; |
| |
| private readonly MockBytesAttributeFactory factory = new MockBytesAttributeFactory(); |
| |
| public MockTokenEatingAnalyzer(int numStopChars, bool preserveHoles) |
| { |
| this.preserveHoles = preserveHoles; |
| this.numStopChars = numStopChars; |
| } |
| |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| MockTokenizer tokenizer = new MockTokenizer(factory, reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH); |
| tokenizer.EnableChecks = (true); |
| TokenStream next; |
| if (numStopChars != 0) |
| { |
| next = new TokenEater(preserveHoles, tokenizer, numStopChars); |
| } |
| else |
| { |
| next = tokenizer; |
| } |
| return new TokenStreamComponents(tokenizer, next); |
| } |
| } |
| |
| private static char SEP = '\u001F'; |
| |
| internal class TestRandomComparer : IComparer<TermFreq2> |
| { |
| public int Compare(TermFreq2 left, TermFreq2 right) |
| { |
| int cmp = ((float)right.weight).CompareTo((float)left.weight); |
| if (cmp == 0) |
| { |
| return left.analyzedForm.CompareToOrdinal(right.analyzedForm); |
| } |
| else |
| { |
| return cmp; |
| } |
| } |
| } |
| |
| [Test] |
| public void TestRandom() |
| { |
| |
| int numQueries = AtLeast(1000); |
| |
| List<TermFreq2> slowCompletor = new List<TermFreq2>(); |
| ISet<string> allPrefixes = new JCG.SortedSet<string>(StringComparer.Ordinal); |
| ISet<string> seen = new JCG.HashSet<string>(); |
| |
| bool doPayloads = Random.nextBoolean(); |
| |
| Input[] keys = null; |
| Input[] payloadKeys = null; |
| if (doPayloads) |
| { |
| payloadKeys = new Input[numQueries]; |
| } |
| else |
| { |
| keys = new Input[numQueries]; |
| } |
| |
| bool preserveSep = Random.nextBoolean(); |
| |
| int numStopChars = Random.nextInt(10); |
| bool preserveHoles = Random.nextBoolean(); |
| |
| if (VERBOSE) |
| { |
| Console.WriteLine("TEST: " + numQueries + " words; preserveSep=" + preserveSep + " numStopChars=" + numStopChars + " preserveHoles=" + preserveHoles); |
| } |
| |
| for (int i = 0; i < numQueries; i++) |
| { |
| int numTokens = TestUtil.NextInt32(Random, 1, 4); |
| string key; |
| string analyzedKey; |
| while (true) |
| { |
| key = ""; |
| analyzedKey = ""; |
| bool lastRemoved = false; |
| for (int token = 0; token < numTokens; token++) |
| { |
| string s; |
| while (true) |
| { |
| // TODO: would be nice to fix this slowCompletor/comparer to |
| // use full range, but we might lose some coverage too... |
| s = TestUtil.RandomSimpleString(Random); |
| if (s.Length > 0) |
| { |
| if (token > 0) |
| { |
| key += " "; |
| } |
| if (preserveSep && analyzedKey.Length > 0 && analyzedKey[analyzedKey.Length - 1] != SEP) |
| { |
| analyzedKey += SEP; |
| } |
| key += s; |
| if (s.Length == 1 && IsStopChar(s[0], numStopChars)) |
| { |
| lastRemoved = true; |
| if (preserveSep && preserveHoles) |
| { |
| analyzedKey += SEP; |
| } |
| } |
| else |
| { |
| lastRemoved = false; |
| analyzedKey += s; |
| } |
| break; |
| } |
| } |
| } |
| |
| analyzedKey = Regex.Replace(analyzedKey, "(^|" + SEP + ")" + SEP + "$", ""); |
| |
| if (preserveSep && lastRemoved) |
| { |
| analyzedKey += SEP; |
| } |
| |
| // Don't add same surface form more than once: |
| if (!seen.contains(key)) |
| { |
| seen.add(key); |
| break; |
| } |
| } |
| |
| for (int j = 1; j < key.Length; j++) |
| { |
| allPrefixes.add(key.Substring(0, j - 0)); |
| } |
| // we can probably do Integer.MAX_VALUE here, but why worry. |
| int weight = Random.nextInt(1 << 24); |
| BytesRef payload; |
| if (doPayloads) |
| { |
| byte[] bytes = new byte[Random.nextInt(10)]; |
| Random.NextBytes(bytes); |
| payload = new BytesRef(bytes); |
| payloadKeys[i] = new Input(key, weight, payload); |
| } |
| else |
| { |
| keys[i] = new Input(key, weight); |
| payload = null; |
| } |
| |
| slowCompletor.Add(new TermFreq2(key, analyzedKey, weight, payload)); |
| } |
| |
| if (VERBOSE) |
| { |
| // Don't just sort original list, to avoid VERBOSE |
| // altering the test: |
| List<TermFreq2> sorted = new List<TermFreq2>(slowCompletor); |
| // LUCENENET NOTE: Must use TimSort because comparer is not expecting ties |
| CollectionUtil.TimSort(sorted); |
| foreach (TermFreq2 ent in sorted) |
| { |
| Console.WriteLine(" surface='" + ent.surfaceForm + "' analyzed='" + ent.analyzedForm + "' weight=" + ent.weight); |
| } |
| } |
| |
| Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, |
| preserveSep ? SuggesterOptions.PRESERVE_SEP : 0, 256, -1, true); |
| if (doPayloads) |
| { |
| suggester.Build(new InputArrayIterator(Shuffle(payloadKeys))); |
| } |
| else |
| { |
| suggester.Build(new InputArrayIterator(Shuffle(keys))); |
| } |
| |
| foreach (string prefix in allPrefixes) |
| { |
| |
| if (VERBOSE) |
| { |
| Console.WriteLine("\nTEST: prefix=" + prefix); |
| } |
| |
| int topN = TestUtil.NextInt32(Random, 1, 10); |
| IList<Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence(prefix, Random).ToString(), false, topN); |
| |
| // 2. go thru whole set to find suggestions: |
| List<TermFreq2> matches = new List<TermFreq2>(); |
| |
| // "Analyze" the key: |
| string[] tokens = prefix.Split(' ').TrimEnd(); |
| StringBuilder builder = new StringBuilder(); |
| bool lastRemoved = false; |
| for (int i = 0; i < tokens.Length; i++) |
| { |
| string token = tokens[i]; |
| if (preserveSep && builder.Length > 0 && !builder.ToString().EndsWith("" + SEP, StringComparison.Ordinal)) |
| { |
| builder.Append(SEP); |
| } |
| |
| if (token.Length == 1 && IsStopChar(token[0], numStopChars)) |
| { |
| if (preserveSep && preserveHoles) |
| { |
| builder.Append(SEP); |
| } |
| lastRemoved = true; |
| } |
| else |
| { |
| builder.Append(token); |
| lastRemoved = false; |
| } |
| } |
| |
| string analyzedKey = builder.ToString(); |
| |
| // Remove trailing sep/holes (TokenStream.end() does |
| // not tell us any trailing holes, yet ... there is an |
| // issue open for this): |
| while (true) |
| { |
| string s = Regex.Replace(analyzedKey, SEP + "$", ""); |
| if (s.Equals(analyzedKey, StringComparison.Ordinal)) |
| { |
| break; |
| } |
| analyzedKey = s; |
| } |
| |
| if (analyzedKey.Length == 0) |
| { |
| // Currently suggester can't suggest from the empty |
| // string! You get no results, not all results... |
| continue; |
| } |
| |
| if (preserveSep && (prefix.EndsWith(" ", StringComparison.Ordinal) || lastRemoved)) |
| { |
| analyzedKey += SEP; |
| } |
| |
| if (VERBOSE) |
| { |
| Console.WriteLine(" analyzed: " + analyzedKey); |
| } |
| |
| // TODO: could be faster... but its slowCompletor for a reason |
| foreach (TermFreq2 e in slowCompletor) |
| { |
| if (e.analyzedForm.StartsWith(analyzedKey, StringComparison.Ordinal)) |
| { |
| matches.Add(e); |
| } |
| } |
| |
| assertTrue(numStopChars > 0 || matches.size() > 0); |
| |
| if (matches.size() > 1) |
| { |
| matches.Sort(new TestRandomComparer()); |
| } |
| |
| if (matches.size() > topN) |
| { |
| //matches = new List<TermFreq2>(matches.SubList(0, topN)); |
| matches = matches.GetRange(0, topN); |
| } |
| |
| if (VERBOSE) |
| { |
| Console.WriteLine(" expected:"); |
| foreach (TermFreq2 lr in matches) |
| { |
| Console.WriteLine(" key=" + lr.surfaceForm + " weight=" + lr.weight); |
| } |
| |
| Console.WriteLine(" actual:"); |
| foreach (Lookup.LookupResult lr in r) |
| { |
| Console.WriteLine(" key=" + lr.Key + " weight=" + lr.Value); |
| } |
| } |
| |
| assertEquals(matches.size(), r.size()); |
| |
| for (int hit = 0; hit < r.size(); hit++) |
| { |
| //System.out.println(" check hit " + hit); |
| assertEquals(matches.ElementAt(hit).surfaceForm, r.ElementAt(hit).Key); |
| assertEquals(matches.ElementAt(hit).weight, r.ElementAt(hit).Value, 0f); |
| if (doPayloads) |
| { |
| assertEquals(matches.ElementAt(hit).payload, r.ElementAt(hit).Payload); |
| } |
| } |
| } |
| } |
| |
| |
| /// <summary> |
| /// LUCENENET specific test. Added fixed inputs to help with debugging issues found in TestRandom(). |
| /// Not necessarily required per se, but it may come in handy again if the above test fails. |
| /// </summary> |
| [Test] |
| public void TestFixed() |
| { |
| bool preserveSep = true; |
| int numStopChars = 2; |
| bool preserveHoles = true; |
| |
| string token11 = "foo bar foo bar"; |
| string token21 = "bar foo orange cat"; |
| |
| string token12 = "sally sells seashells by the sea shore"; |
| string token22 = "peter piper picked a pack of pickled peppers"; |
| |
| string query1 = "ba"; |
| string query2 = "pet"; |
| |
| // Query 1 |
| Analyzer a1 = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); |
| AnalyzingSuggester suggester1 = new AnalyzingSuggester(a1, a1, |
| preserveSep ? SuggesterOptions.PRESERVE_SEP : 0, 256, -1, true); |
| |
| suggester1.Build(new InputArrayIterator(new Input[] { new Input(token11, 123456), new Input(token21, 654321) })); |
| |
| int topN1 = 4; |
| IList<Lookup.LookupResult> r1 = suggester1.DoLookup(query1, false, topN1); |
| |
| assertEquals(1, r1.size()); |
| |
| assertEquals("bar foo orange cat", r1.ElementAt(0).Key); |
| assertEquals(654321, r1.ElementAt(0).Value, 0f); |
| |
| // Query 2 |
| Analyzer a2 = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); |
| AnalyzingSuggester suggester2 = new AnalyzingSuggester(a2, a2, |
| preserveSep ? SuggesterOptions.PRESERVE_SEP : 0, 256, -1, true); |
| |
| suggester2.Build(new InputArrayIterator(new Input[] { new Input(token12, 1234567), new Input(token22, 7654321) })); |
| |
| int topN2 = 4; |
| IList<Lookup.LookupResult> r2 = suggester2.DoLookup(query2, false, topN2); |
| |
| assertEquals(1, r2.size()); |
| |
| assertEquals("peter piper picked a pack of pickled peppers", r2.ElementAt(0).Key); |
| assertEquals(7654321, r2.ElementAt(0).Value, 0f); |
| } |
| |
| |
| [Test] |
| public void TestMaxSurfaceFormsPerAnalyzedForm() |
| { |
| Analyzer a = new MockAnalyzer(Random); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1, true); |
| suggester.Build(new InputArrayIterator(Shuffle(new Input("a", 40), |
| new Input("a ", 50), new Input(" a", 60)))); |
| |
| IList<Lookup.LookupResult> results = suggester.DoLookup("a", false, 5); |
| assertEquals(2, results.size()); |
| assertEquals(" a", results.ElementAt(0).Key); |
| assertEquals(60, results.ElementAt(0).Value); |
| assertEquals("a ", results.ElementAt(1).Key); |
| assertEquals(50, results.ElementAt(1).Value); |
| } |
| |
| [Test] |
| public void TestQueueExhaustion() |
| { |
| Analyzer a = new MockAnalyzer(Random); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, SuggesterOptions.EXACT_FIRST, 256, -1, true); |
| |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("a", 2), |
| new Input("a b c", 3), |
| new Input("a c a", 1), |
| new Input("a c b", 1), |
| })); |
| |
| suggester.DoLookup("a", false, 4); |
| } |
| |
| [Test] |
| public void TestExactFirstMissingResult() |
| { |
| |
| Analyzer a = new MockAnalyzer(Random); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, SuggesterOptions.EXACT_FIRST, 256, -1, true); |
| |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("a", 5), |
| new Input("a b", 3), |
| new Input("a c", 4), |
| })); |
| |
| assertEquals(3, suggester.Count); |
| IList<Lookup.LookupResult> results = suggester.DoLookup("a", false, 3); |
| assertEquals(3, results.size()); |
| assertEquals("a", results.ElementAt(0).Key); |
| assertEquals(5, results.ElementAt(0).Value); |
| assertEquals("a c", results.ElementAt(1).Key); |
| assertEquals(4, results.ElementAt(1).Value); |
| assertEquals("a b", results.ElementAt(2).Key); |
| assertEquals(3, results.ElementAt(2).Value); |
| |
| // Try again after save/load: |
| DirectoryInfo tmpDir = CreateTempDir("AnalyzingSuggesterTest"); |
| tmpDir.Create(); |
| |
| FileInfo path = new FileInfo(Path.Combine(tmpDir.FullName, "suggester")); |
| |
| Stream os = new FileStream(path.FullName, FileMode.OpenOrCreate); |
| suggester.Store(os); |
| os.Dispose(); |
| |
| Stream @is = new FileStream(path.FullName, FileMode.Open); |
| suggester.Load(@is); |
| @is.Dispose(); |
| |
| assertEquals(3, suggester.Count); |
| results = suggester.DoLookup("a", false, 3); |
| assertEquals(3, results.size()); |
| assertEquals("a", results.ElementAt(0).Key); |
| assertEquals(5, results.ElementAt(0).Value); |
| assertEquals("a c", results.ElementAt(1).Key); |
| assertEquals(4, results.ElementAt(1).Value); |
| assertEquals("a b", results.ElementAt(2).Key); |
| assertEquals(3, results.ElementAt(2).Value); |
| } |
| |
| internal class TestDupSurfaceFormsMissingResultsTokenStreamComponents : TokenStreamComponents |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| public TestDupSurfaceFormsMissingResultsTokenStreamComponents(AnalyzingSuggesterTest outerInstance, Tokenizer tokenizer) |
| : base(tokenizer) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| return new CannedTokenStream(new Token[] { |
| NewToken("hairy", 1, 1), |
| NewToken("smelly", 0, 1), |
| NewToken("dog", 1, 1), |
| }); |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| |
| internal class TestDupSurfaceFormsMissingResultsAnalyzer : Analyzer |
| { |
| private readonly AnalyzingSuggesterTest outerInstance; |
| public TestDupSurfaceFormsMissingResultsAnalyzer(AnalyzingSuggesterTest outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| } |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new TestDupSurfaceFormsMissingResultsTokenStreamComponents(outerInstance, tokenizer); |
| } |
| } |
| |
| [Test] |
| public void TestDupSurfaceFormsMissingResults() |
| { |
| Analyzer a = new TestDupSurfaceFormsMissingResultsAnalyzer(this); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true); |
| |
| suggester.Build(new InputArrayIterator(Shuffle( |
| new Input("hambone", 6), |
| new Input("nellie", 5)))); |
| |
| IList<Lookup.LookupResult> results = suggester.DoLookup("nellie", false, 2); |
| assertEquals(2, results.size()); |
| assertEquals("hambone", results.ElementAt(0).Key); |
| assertEquals(6, results.ElementAt(0).Value); |
| assertEquals("nellie", results.ElementAt(1).Key); |
| assertEquals(5, results.ElementAt(1).Value); |
| |
| // Try again after save/load: |
| DirectoryInfo tmpDir = CreateTempDir("AnalyzingSuggesterTest"); |
| tmpDir.Create(); |
| |
| FileInfo path = new FileInfo(Path.Combine(tmpDir.FullName, "suggester")); |
| |
| Stream os = new FileStream(path.FullName, FileMode.OpenOrCreate); |
| suggester.Store(os); |
| os.Dispose(); |
| |
| Stream @is = new FileStream(path.FullName, FileMode.Open); |
| suggester.Load(@is); |
| @is.Dispose(); |
| |
| results = suggester.DoLookup("nellie", false, 2); |
| assertEquals(2, results.size()); |
| assertEquals("hambone", results.ElementAt(0).Key); |
| assertEquals(6, results.ElementAt(0).Value); |
| assertEquals("nellie", results.ElementAt(1).Key); |
| assertEquals(5, results.ElementAt(1).Value); |
| } |
| internal class TestDupSurfaceFormsMissingResults2TokenStreamComponents : TokenStreamComponents |
| { |
| internal int count; |
| public TestDupSurfaceFormsMissingResults2TokenStreamComponents(Tokenizer tokenizer) |
| : base(tokenizer) |
| { } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| if (count == 0) |
| { |
| count++; |
| return new CannedTokenStream(new Token[] { |
| NewToken("p", 1, 1), |
| NewToken("q", 1, 1), |
| NewToken("r", 0, 1), |
| NewToken("s", 0, 1), |
| }); |
| } |
| else |
| { |
| return new CannedTokenStream(new Token[] { |
| NewToken("p", 1, 1), |
| }); |
| } |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| |
| internal class TestDupSurfaceFormsMissingResults2Analyzer : Analyzer |
| { |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new TestDupSurfaceFormsMissingResults2TokenStreamComponents(tokenizer); |
| } |
| } |
| |
| [Test] |
| public void TestDupSurfaceFormsMissingResults2() |
| { |
| Analyzer a = new TestDupSurfaceFormsMissingResults2Analyzer(); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true); |
| |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("a", 6), |
| new Input("b", 5), |
| })); |
| |
| IList<Lookup.LookupResult> results = suggester.DoLookup("a", false, 2); |
| assertEquals(2, results.size()); |
| assertEquals("a", results.ElementAt(0).Key); |
| assertEquals(6, results.ElementAt(0).Value); |
| assertEquals("b", results.ElementAt(1).Key); |
| assertEquals(5, results.ElementAt(1).Value); |
| |
| // Try again after save/load: |
| DirectoryInfo tmpDir = CreateTempDir("AnalyzingSuggesterTest"); |
| tmpDir.Create(); |
| |
| FileInfo path = new FileInfo(Path.Combine(tmpDir.FullName, "suggester")); |
| |
| Stream os = new FileStream(path.FullName, FileMode.OpenOrCreate); |
| suggester.Store(os); |
| os.Dispose(); |
| |
| Stream @is = new FileStream(path.FullName, FileMode.Open); |
| suggester.Load(@is); |
| @is.Dispose(); |
| |
| results = suggester.DoLookup("a", false, 2); |
| assertEquals(2, results.size()); |
| assertEquals("a", results.ElementAt(0).Key); |
| assertEquals(6, results.ElementAt(0).Value); |
| assertEquals("b", results.ElementAt(1).Key); |
| assertEquals(5, results.ElementAt(1).Value); |
| } |
| |
| internal class Test0ByteKeysTokenStreamComponents : TokenStreamComponents |
| { |
| internal int tokenStreamCounter = 0; |
| internal TokenStream[] tokenStreams = new TokenStream[] { |
| new CannedBinaryTokenStream(new BinaryToken[] { |
| NewToken(new BytesRef(new byte[] {0x0, 0x0, 0x0})), |
| }), |
| new CannedBinaryTokenStream(new BinaryToken[] { |
| NewToken(new BytesRef(new byte[] {0x0, 0x0})), |
| }), |
| new CannedBinaryTokenStream(new BinaryToken[] { |
| NewToken(new BytesRef(new byte[] {0x0, 0x0, 0x0})), |
| }), |
| new CannedBinaryTokenStream(new BinaryToken[] { |
| NewToken(new BytesRef(new byte[] {0x0, 0x0})), |
| }), |
| }; |
| |
| public Test0ByteKeysTokenStreamComponents(Tokenizer tokenizer) |
| : base(tokenizer) |
| { } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| TokenStream result = tokenStreams[tokenStreamCounter]; |
| tokenStreamCounter++; |
| return result; |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| |
| internal class Test0ByteKeysAnalyzer : Analyzer |
| { |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new Test0ByteKeysTokenStreamComponents(tokenizer); |
| } |
| } |
| |
| [Test] |
| public void Test0ByteKeys() |
| { |
| Analyzer a = new Test0ByteKeysAnalyzer(); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true); |
| |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("a a", 50), |
| new Input("a b", 50), |
| })); |
| } |
| |
| [Test] |
| public void TestDupSurfaceFormsMissingResults3() |
| { |
| Analyzer a = new MockAnalyzer(Random); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, SuggesterOptions.PRESERVE_SEP, 256, -1, true); |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("a a", 7), |
| new Input("a a", 7), |
| new Input("a c", 6), |
| new Input("a c", 3), |
| new Input("a b", 5), |
| })); |
| assertEquals("[a a/7, a c/6, a b/5]", suggester.DoLookup("a", false, 3).toString()); |
| } |
| |
| [Test] |
| public void TestEndingSpace() |
| { |
| Analyzer a = new MockAnalyzer(Random); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, SuggesterOptions.PRESERVE_SEP, 256, -1, true); |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("i love lucy", 7), |
| new Input("isla de muerta", 8), |
| })); |
| assertEquals("[isla de muerta/8, i love lucy/7]", suggester.DoLookup("i", false, 3).toString()); |
| assertEquals("[i love lucy/7]", suggester.DoLookup("i ", false, 3).toString()); |
| } |
| |
| internal class TestTooManyExpressionsTokenStreamComponents : TokenStreamComponents |
| { |
| public TestTooManyExpressionsTokenStreamComponents(Tokenizer tokenizer) |
| : base(tokenizer) |
| { } |
| |
| public override TokenStream TokenStream |
| { |
| get |
| { |
| Token a = new Token("a", 0, 1); |
| a.PositionIncrement = (1); |
| Token b = new Token("b", 0, 1); |
| b.PositionIncrement = (0); |
| return new CannedTokenStream(new Token[] { a, b }); |
| } |
| } |
| |
| protected override void SetReader(TextReader reader) |
| { |
| } |
| } |
| internal class TestTooManyExpressionsAnalyzer : Analyzer |
| { |
| protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); |
| return new TestTooManyExpressionsTokenStreamComponents(tokenizer); |
| } |
| } |
| |
| [Test] |
| public void TestTooManyExpansions() |
| { |
| Analyzer a = new TestTooManyExpressionsAnalyzer(); |
| |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1, true); |
| suggester.Build(new InputArrayIterator(new Input[] { new Input("a", 1) })); |
| |
| assertEquals("[a/1]", suggester.DoLookup("a", false, 1).toString()); |
| } |
| |
| [Test] |
| public void TestIllegalLookupArgument() |
| { |
| Analyzer a = new MockAnalyzer(Random); |
| AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true); |
| suggester.Build(new InputArrayIterator(new Input[] { |
| new Input("а где Люси?", 7), |
| })); |
| try |
| { |
| suggester.DoLookup("а\u001E", false, 3); |
| fail("should throw IllegalArgumentException"); |
| } |
| catch (ArgumentException /*e*/) |
| { |
| // expected |
| } |
| try |
| { |
| suggester.DoLookup("а\u001F", false, 3); |
| fail("should throw IllegalArgumentException"); |
| } |
| catch (ArgumentException /*e*/) |
| { |
| // expected |
| } |
| } |
| |
| internal static IEnumerable<Input> Shuffle(params Input[] values) |
| { |
| IList<Input> asList = new List<Input>(values.Length); |
| foreach (Input value in values) |
| { |
| asList.Add(value); |
| } |
| asList.Shuffle(); |
| return asList; |
| } |
| } |
| } |