src/Lucene.Net.Tests.Suggest/Suggest/Analyzing/FuzzySuggesterTest.cs - lucenenet - Git at Google

 using J2N;
 using J2N.Collections.Generic.Extensions;
 using J2N.Text;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Diagnostics;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using Lucene.Net.Util.Automaton;
 using NUnit.Framework;
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Text;
 using System.Text.RegularExpressions;
 using Console = Lucene.Net.Util.SystemConsole;
 using JCG = J2N.Collections.Generic;

 namespace Lucene.Net.Search.Suggest.Analyzing
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     public class FuzzySuggesterTest : LuceneTestCase
     {
         [Test]
         public void TestRandomEdits()
         {
             List<Input> keys = new List<Input>();
             int numTerms = AtLeast(100);
             for (int i = 0; i < numTerms; i++)
             {
                 keys.Add(new Input("boo" + TestUtil.RandomSimpleString(Random), 1 + Random.Next(100)));
             }
             keys.Add(new Input("foo bar boo far", 12));
             MockAnalyzer analyzer = new MockAnalyzer(Random, MockTokenizer.KEYWORD, false);
             FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
                                                           0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
             suggester.Build(new InputArrayEnumerator(keys));
             int numIters = AtLeast(10);
             for (int i = 0; i < numIters; i++)
             {
                 string addRandomEdit = AddRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
                 IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence(addRandomEdit, Random).ToString(), false, 2);
                 assertEquals(addRandomEdit, 1, results.size());
                 assertEquals("foo bar boo far", results[0].Key.toString());
                 assertEquals(12, results[0].Value, 0.01F);
             }
         }

         [Test]
         public void TestNonLatinRandomEdits()
         {
             List<Input> keys = new List<Input>();
             int numTerms = AtLeast(100);
             for (int i = 0; i < numTerms; i++)
             {
                 keys.Add(new Input("буу" + TestUtil.RandomSimpleString(Random), 1 + Random.nextInt(100)));
             }
             keys.Add(new Input("фуу бар буу фар", 12));
             MockAnalyzer analyzer = new MockAnalyzer(Random, MockTokenizer.KEYWORD, false);
             FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
                 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
             suggester.Build(new InputArrayEnumerator(keys));
             int numIters = AtLeast(10);
             for (int i = 0; i < numIters; i++)
             {
                 string addRandomEdit = AddRandomEdit("фуу бар буу", 0);
                 IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence(addRandomEdit, Random).ToString(), false, 2);
                 assertEquals(addRandomEdit, 1, results.size());
                 assertEquals("фуу бар буу фар", results[0].Key.toString());
                 assertEquals(12, results[0].Value, 0.01F);
             }
         }

         /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
         [Test]
         public void TestKeyword()
         {
             Input[] keys = new Input[] {
                 new Input("foo", 50),
                 new Input("bar", 10),
                 new Input("barbar", 12),
                 new Input("barbara", 6)
             };

             FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(Random, MockTokenizer.KEYWORD, false));
             suggester.Build(new InputArrayEnumerator(keys));

             IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("bariar", Random).ToString(), false, 2);
             assertEquals(2, results.size());
             assertEquals("barbar", results[0].Key.toString());
             assertEquals(12, results[0].Value, 0.01F);

             results = suggester.DoLookup(TestUtil.StringToCharSequence("barbr", Random).ToString(), false, 2);
             assertEquals(2, results.size());
             assertEquals("barbar", results[0].Key.toString());
             assertEquals(12, results[0].Value, 0.01F);

             results = suggester.DoLookup(TestUtil.StringToCharSequence("barbara", Random).ToString(), false, 2);
             assertEquals(2, results.size());
             assertEquals("barbara", results[0].Key.toString());
             assertEquals(6, results[0].Value, 0.01F);

             results = suggester.DoLookup(TestUtil.StringToCharSequence("barbar", Random).ToString(), false, 2);
             assertEquals(2, results.size());
             assertEquals("barbar", results[0].Key.toString());
             assertEquals(12, results[0].Value, 0.01F);
             assertEquals("barbara", results[1].Key.toString());
             assertEquals(6, results[1].Value, 0.01F);

             results = suggester.DoLookup(TestUtil.StringToCharSequence("barbaa", Random).ToString(), false, 2);
             assertEquals(2, results.size());
             assertEquals("barbar", results[0].Key.toString());
             assertEquals(12, results[0].Value, 0.01F);
             assertEquals("barbara", results[1].Key.toString());
             assertEquals(6, results[1].Value, 0.01F);

             // top N of 2, but only foo is available
             results = suggester.DoLookup(TestUtil.StringToCharSequence("f", Random).ToString(), false, 2);
             assertEquals(1, results.size());
             assertEquals("foo", results[0].Key.toString());
             assertEquals(50, results[0].Value, 0.01F);

             // top N of 1 for 'bar': we return this even though
             // barbar is higher because exactFirst is enabled:
             results = suggester.DoLookup(TestUtil.StringToCharSequence("bar", Random).ToString(), false, 1);
             assertEquals(1, results.size());
             assertEquals("bar", results[0].Key.toString());
             assertEquals(10, results[0].Value, 0.01F);

             // top N Of 2 for 'b'
             results = suggester.DoLookup(TestUtil.StringToCharSequence("b", Random).ToString(), false, 2);
             assertEquals(2, results.size());
             assertEquals("barbar", results[0].Key.toString());
             assertEquals(12, results[0].Value, 0.01F);
             assertEquals("bar", results[1].Key.toString());
             assertEquals(10, results[1].Value, 0.01F);

             // top N of 3 for 'ba'
             results = suggester.DoLookup(TestUtil.StringToCharSequence("ba", Random).ToString(), false, 3);
             assertEquals(3, results.size());
             assertEquals("barbar", results[0].Key.toString());
             assertEquals(12, results[0].Value, 0.01F);
             assertEquals("bar", results[1].Key.toString());
             assertEquals(10, results[1].Value, 0.01F);
             assertEquals("barbara", results[2].Key.toString());
             assertEquals(6, results[2].Value, 0.01F);
         }

         /**
          * basic "standardanalyzer" test with stopword removal
          */
         [Test]
         public void TestStandard()
         {
             Input[] keys = new Input[] {
                 new Input("the ghost of christmas past", 50),
             };

             Analyzer standard = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
             FuzzySuggester suggester = new FuzzySuggester(standard, standard, SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
                 FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
             suggester.Build(new InputArrayEnumerator(keys));

             IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("the ghost of chris", Random).ToString(), false, 1);
             assertEquals(1, results.size());
             assertEquals("the ghost of christmas past", results[0].Key.toString());
             assertEquals(50, results[0].Value, 0.01F);

             // omit the 'the' since its a stopword, its suggested anyway
             results = suggester.DoLookup(TestUtil.StringToCharSequence("ghost of chris", Random).ToString(), false, 1);
             assertEquals(1, results.size());
             assertEquals("the ghost of christmas past", results[0].Key.toString());
             assertEquals(50, results[0].Value, 0.01F);

             // omit the 'the' and 'of' since they are stopwords, its suggested anyway
             results = suggester.DoLookup(TestUtil.StringToCharSequence("ghost chris", Random).ToString(), false, 1);
             assertEquals(1, results.size());
             assertEquals("the ghost of christmas past", results[0].Key.toString());
             assertEquals(50, results[0].Value, 0.01F);
         }

         [Test]
         public void TestNoSeps()
         {
             Input[] keys = new Input[] {
                 new Input("ab cd", 0),
                 new Input("abcd", 1),
             };

             SuggesterOptions options = 0;

             Analyzer a = new MockAnalyzer(Random);
             FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, true, 1, true, 1, 3, false);
             suggester.Build(new InputArrayEnumerator(keys));
             // TODO: would be nice if "ab " would allow the test to
             // pass, and more generally if the analyzer can know
             // that the user's current query has ended at a word,
             // but, analyzers don't produce SEP tokens!
             IList<Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence("ab c", Random).ToString(), false, 2);
             assertEquals(2, r.size());

             // With no PRESERVE_SEPS specified, "ab c" should also
             // complete to "abcd", which has higher weight so should
             // appear first:
             assertEquals("abcd", r[0].Key.toString());
         }

         internal class TestGraphDupsTokenStreamComponents : TokenStreamComponents
         {
             private readonly FuzzySuggesterTest outerInstance;
             internal int tokenStreamCounter = 0;
             internal readonly TokenStream[] tokenStreams = new TokenStream[] {
             new CannedTokenStream(new Token[] {
                 NewToken("wifi",1,1),
                 NewToken("hotspot",0,2),
                 NewToken("network",1,1),
                 NewToken("is",1,1),
                 NewToken("slow",1,1)
               }),
             new CannedTokenStream(new Token[] {
                 NewToken("wi",1,1),
                 NewToken("hotspot",0,3),
                 NewToken("fi",1,1),
                 NewToken("network",1,1),
                 NewToken("is",1,1),
                 NewToken("fast",1,1)

               }),
             new CannedTokenStream(new Token[] {
                 NewToken("wifi",1,1),
                 NewToken("hotspot",0,2),
                 NewToken("network",1,1)
               }),
           };

             public TestGraphDupsTokenStreamComponents(FuzzySuggesterTest outerInstance, Tokenizer tokenizer)
                 : base(tokenizer)
             {
                 this.outerInstance = outerInstance;
             }

             public override TokenStream TokenStream
             {
                 get
                 {
                     TokenStream result = tokenStreams[tokenStreamCounter];
                     tokenStreamCounter++;
                     return result;
                 }
             }

             protected internal override void SetReader(TextReader reader)
             {
             }
         }

         internal class TestGraphDupsAnalyzer : Analyzer
         {
             private readonly FuzzySuggesterTest outerInstance;
             public TestGraphDupsAnalyzer(FuzzySuggesterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }

             protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
                 return new TestGraphDupsTokenStreamComponents(outerInstance, tokenizer);
             }
         }

         [Test]
         public void TestGraphDups()
         {
             Analyzer analyzer = new TestGraphDupsAnalyzer(this);

             Input[] keys = new Input[] {
                 new Input("wifi network is slow", 50),
                 new Input("wi fi network is fast", 10),
             };
             FuzzySuggester suggester = new FuzzySuggester(analyzer);
             suggester.Build(new InputArrayEnumerator(keys));

             IList<Lookup.LookupResult> results = suggester.DoLookup("wifi network", false, 10);
             if (Verbose)
             {
                 Console.WriteLine("Results: " + results);
             }
             assertEquals(2, results.size());
             assertEquals("wifi network is slow", results[0].Key);
             assertEquals(50, results[0].Value);
             assertEquals("wi fi network is fast", results[1].Key);
             assertEquals(10, results[1].Value);
         }

         [Test]
         public void TestEmpty()
         {
             FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(Random, MockTokenizer.KEYWORD, false));
             suggester.Build(new InputArrayEnumerator(new Input[0]));

             IList<Lookup.LookupResult> result = suggester.DoLookup("a", false, 20);
             assertTrue(result.Count == 0);
         }

         internal class TestInputPathRequiredTokenStreamComponents : TokenStreamComponents
         {
             private readonly FuzzySuggesterTest outerInstance;
             internal int tokenStreamCounter = 0;
             internal readonly TokenStream[] tokenStreams = new TokenStream[] {
             new CannedTokenStream(new Token[] {
                 FuzzySuggesterTest.NewToken("ab",1,1),
                 FuzzySuggesterTest.NewToken("ba",0,1),
                 FuzzySuggesterTest.NewToken("xc",1,1)
               }),
             new CannedTokenStream(new Token[] {
                 FuzzySuggesterTest.NewToken("ba",1,1),
                 FuzzySuggesterTest.NewToken("xd",1,1)
               }),
             new CannedTokenStream(new Token[] {
                 FuzzySuggesterTest.NewToken("ab",1,1),
                 FuzzySuggesterTest.NewToken("ba",0,1),
                 FuzzySuggesterTest.NewToken("x",1,1)
               })
             };

             public TestInputPathRequiredTokenStreamComponents(FuzzySuggesterTest outerInstance, Tokenizer tokenizer)
                 : base(tokenizer)
             {
                 this.outerInstance = outerInstance;
             }

             public override TokenStream TokenStream
             {
                 get
                 {
                     TokenStream result = tokenStreams[tokenStreamCounter];
                     tokenStreamCounter++;
                     return result;
                 }
             }

             protected internal override void SetReader(TextReader reader)
             {
             }
         }

         internal class TestInputPathRequiredAnalyzer : Analyzer
         {
             private readonly FuzzySuggesterTest outerInstance;

             public TestInputPathRequiredAnalyzer(FuzzySuggesterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }

             protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
                 return new TestInputPathRequiredTokenStreamComponents(outerInstance, tokenizer);
             }
         }

         [Test]
         public void TestInputPathRequired()
         {

             //  SynonymMap.Builder b = new SynonymMap.Builder(false);
             //  b.add(new CharsRef("ab"), new CharsRef("ba"), true);
             //  final SynonymMap map = b.build();

             //  The Analyzer below mimics the functionality of the SynonymAnalyzer
             //  using the above map, so that the suggest module does not need a dependency on the
             //  synonym module

             Analyzer analyzer = new TestInputPathRequiredAnalyzer(this);

             Input[] keys = new Input[] {
                 new Input("ab xc", 50),
                 new Input("ba xd", 50),
             };
             FuzzySuggester suggester = new FuzzySuggester(analyzer);
             suggester.Build(new InputArrayEnumerator(keys));
             IList<Lookup.LookupResult> results = suggester.DoLookup("ab x", false, 1);
             assertTrue(results.size() == 1);
         }

         private static Token NewToken(string term, int posInc, int posLength)
         {
             Token t = new Token(term, 0, 0);
             t.PositionIncrement = (posInc);
             t.PositionLength = (posLength);
             return t;
         }

         /*
         private void printTokens(final Analyzer analyzer, String input) throws IOException {
           Console.WriteLine("Tokens for " + input);
           TokenStream ts = analyzer.tokenStream("", new StringReader(input));
           ts.reset();
           final TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
           final PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
           final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);

           while(ts.incrementToken()) {
             termBytesAtt.fillBytesRef();
             Console.WriteLine(String.format("%s,%s,%s", termBytesAtt.getBytesRef().utf8ToString(), posIncAtt.getPositionIncrement(), posLengthAtt.getPositionLength()));
           }
           ts.end();
           ts.close();
         }
         */

         internal class UsualTokenStreamComponents : TokenStreamComponents
         {
             private readonly FuzzySuggesterTest outerInstance;
             internal int count;

             public UsualTokenStreamComponents(FuzzySuggesterTest outerInstance, Tokenizer tokenizer)
                 : base(tokenizer)
             {
                 this.outerInstance = outerInstance;
             }

             public override TokenStream TokenStream
             {
                 get
                 {
                     // 4th time we are called, return tokens a b,
                     // else just a:
                     if (count++ != 3)
                     {
                         return new CannedTokenStream(new Token[] {
                             NewToken("a", 1, 1),
                         });
                     }
                     else
                     {
                         // After that "a b":
                         return new CannedTokenStream(new Token[] {
                             NewToken("a", 1, 1),
                             NewToken("b", 1, 1),
                         });
                     }
                 }
             }

             protected internal override void SetReader(TextReader reader)
             {
             }
         }
         internal class UsualAnalyzer : Analyzer
         {
             private readonly FuzzySuggesterTest outerInstance;
             public UsualAnalyzer(FuzzySuggesterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }

             protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
                 return new UsualTokenStreamComponents(outerInstance, tokenizer);
             }
         }

         private Analyzer GetUnusualAnalyzer()
         {
             return new UsualAnalyzer(this);
         }

         [Test]
         public void TestExactFirst()
         {

             Analyzer a = GetUnusualAnalyzer();
             FuzzySuggester suggester = new FuzzySuggester(a, a, SuggesterOptions.EXACT_FIRST | SuggesterOptions.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
             suggester.Build(new InputArrayEnumerator(new Input[] {
                 new Input("x y", 1),
                 new Input("x y z", 3),
                 new Input("x", 2),
                 new Input("z z z", 20),
             }));

             //Console.WriteLine("ALL: " + suggester.DoLookup("x y", false, 6));

             for (int topN = 1; topN < 6; topN++)
             {
                 IList<Lookup.LookupResult> results = suggester.DoLookup("x y", false, topN);
                 //Console.WriteLine("topN=" + topN + " " + results);

                 assertEquals(Math.Min(topN, 4), results.size());

                 assertEquals("x y", results[0].Key);
                 assertEquals(1, results[0].Value);

                 if (topN > 1)
                 {
                     assertEquals("z z z", results[1].Key);
                     assertEquals(20, results[1].Value);

                     if (topN > 2)
                     {
                         assertEquals("x y z", results[2].Key);
                         assertEquals(3, results[2].Value);

                         if (topN > 3)
                         {
                             assertEquals("x", results[3].Key);
                             assertEquals(2, results[3].Value);
                         }
                     }
                 }
             }
         }

         [Test]
         public void TestNonExactFirst()
         {

             Analyzer a = GetUnusualAnalyzer();
             FuzzySuggester suggester = new FuzzySuggester(a, a, SuggesterOptions.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);

             suggester.Build(new InputArrayEnumerator(new Input[] {
                 new Input("x y", 1),
                 new Input("x y z", 3),
                 new Input("x", 2),
                 new Input("z z z", 20),
             }));

             for (int topN = 1; topN < 6; topN++)
             {
                 IList<Lookup.LookupResult> results = suggester.DoLookup("p", false, topN);

                 assertEquals(Math.Min(topN, 4), results.size());

                 assertEquals("z z z", results[0].Key);
                 assertEquals(20, results[0].Value);

                 if (topN > 1)
                 {
                     assertEquals("x y z", results[1].Key);
                     assertEquals(3, results[1].Value);

                     if (topN > 2)
                     {
                         assertEquals("x", results[2].Key);
                         assertEquals(2, results[2].Value);

                         if (topN > 3)
                         {
                             assertEquals("x y", results[3].Key);
                             assertEquals(1, results[3].Value);
                         }
                     }
                 }
             }
         }

         // Holds surface form separately:
         internal class TermFreqPayload2 : IComparable<TermFreqPayload2>
         {
             public readonly string surfaceForm;
             public readonly string analyzedForm;
             public readonly long weight;

             public TermFreqPayload2(string surfaceForm, string analyzedForm, long weight)
             {
                 this.surfaceForm = surfaceForm;
                 this.analyzedForm = analyzedForm;
                 this.weight = weight;
             }

             public int CompareTo(TermFreqPayload2 other)
             {
                 int cmp = analyzedForm.CompareToOrdinal(other.analyzedForm);
                 if (cmp != 0)
                 {
                     return cmp;
                 }
                 else if (weight > other.weight)
                 {
                     return -1;
                 }
                 else if (weight < other.weight)
                 {
                     return 1;
                 }
                 else
                 {
                     if (Debugging.AssertsEnabled) Debugging.Assert(false);
                     return 0;
                 }
             }
         }

         private static bool IsStopChar(char ch, int numStopChars)
         {
             //Console.WriteLine("IS? " + ch + ": " + (ch - 'a') + ": " + ((ch - 'a') < numStopChars));
             return (ch - 'a') < numStopChars;
         }

         // Like StopFilter:
         internal sealed class TokenEater : TokenFilter
         {
             private readonly IPositionIncrementAttribute posIncrAtt;
             private readonly ICharTermAttribute termAtt;
             private readonly int numStopChars;
             private readonly bool preserveHoles;
             private bool first;

             public TokenEater(bool preserveHoles, TokenStream @in, int numStopChars)
             : base(@in)
             {

                 this.preserveHoles = preserveHoles;
                 this.numStopChars = numStopChars;
                 this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
                 this.termAtt = AddAttribute<ICharTermAttribute>();
             }

             public override void Reset()
             {
                 base.Reset();
                 first = true;
             }

             public override sealed bool IncrementToken()
             {
                 int skippedPositions = 0;
                 while (m_input.IncrementToken())
                 {
                     if (termAtt.Length != 1 || !IsStopChar(termAtt.Buffer[0], numStopChars))
                     {
                         int posInc = posIncrAtt.PositionIncrement + skippedPositions;
                         if (first)
                         {
                             if (posInc == 0)
                             {
                                 // first token having posinc=0 is illegal.
                                 posInc = 1;
                             }
                             first = false;
                         }
                         posIncrAtt.PositionIncrement = (posInc);
                         //Console.WriteLine("RETURN term=" + termAtt + " numStopChars=" + numStopChars);
                         return true;
                     }
                     if (preserveHoles)
                     {
                         skippedPositions += posIncrAtt.PositionIncrement;
                     }
                 }

                 return false;
             }
         }

         internal class MockTokenEatingAnalyzer : Analyzer
         {
             private readonly int numStopChars;
             private readonly bool preserveHoles;

             public MockTokenEatingAnalyzer(int numStopChars, bool preserveHoles)
             {
                 this.preserveHoles = preserveHoles;
                 this.numStopChars = numStopChars;
             }

             protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
                 tokenizer.EnableChecks = (true);
                 TokenStream next;
                 if (numStopChars != 0)
                 {
                     next = new TokenEater(preserveHoles, tokenizer, numStopChars);
                 }
                 else
                 {
                     next = tokenizer;
                 }
                 return new TokenStreamComponents(tokenizer, next);
             }
         }

         internal class TestRandomComparer : IComparer<Lookup.LookupResult>
         {
             public int Compare(Lookup.LookupResult left, Lookup.LookupResult right)
             {
                 int cmp = ((float)right.Value).CompareTo((float)left.Value);
                 if (cmp == 0)
                 {
                     return left.CompareTo(right);
                 }
                 else
                 {
                     return cmp;
                 }
             }
         }

         [Test]
         [Slow]
         public void TestRandom()
         {

             int numQueries = AtLeast(100);

             List<TermFreqPayload2> slowCompletor = new List<TermFreqPayload2>();
             JCG.SortedSet<string> allPrefixes = new JCG.SortedSet<string>(StringComparer.Ordinal);
             ISet<string> seen = new JCG.HashSet<string>();

             Input[] keys = new Input[numQueries];

             bool preserveSep = Random.nextBoolean();
             bool unicodeAware = Random.nextBoolean();

             int numStopChars = Random.nextInt(10);
             bool preserveHoles = Random.nextBoolean();

             if (Verbose)
             {
                 Console.WriteLine("TEST: " + numQueries + " words; preserveSep=" + preserveSep + " ; unicodeAware=" + unicodeAware + " numStopChars=" + numStopChars + " preserveHoles=" + preserveHoles);
             }

             for (int i = 0; i < numQueries; i++)
             {
                 int numTokens = TestUtil.NextInt32(Random, 1, 4);
                 string key;
                 string analyzedKey;
                 while (true)
                 {
                     key = "";
                     analyzedKey = "";
                     bool lastRemoved = false;
                     for (int token = 0; token < numTokens; token++)
                     {
                         String s;
                         while (true)
                         {
                             // TODO: would be nice to fix this slowCompletor/comparer to
                             // use full range, but we might lose some coverage too...
                             s = TestUtil.RandomSimpleString(Random);
                             if (s.Length > 0)
                             {
                                 if (token > 0)
                                 {
                                     key += " ";
                                 }
                                 if (preserveSep && analyzedKey.Length > 0 && (unicodeAware ? analyzedKey.CodePointAt(analyzedKey.CodePointCount(0, analyzedKey.Length) - 1) != ' ' : analyzedKey[analyzedKey.Length - 1] != ' '))
                                 {
                                     analyzedKey += " ";
                                 }
                                 key += s;
                                 if (s.Length == 1 && IsStopChar(s[0], numStopChars))
                                 {
                                     if (preserveSep && preserveHoles)
                                     {
                                         analyzedKey += '\u0000';
                                     }
                                     lastRemoved = true;
                                 }
                                 else
                                 {
                                     analyzedKey += s;
                                     lastRemoved = false;
                                 }
                                 break;
                             }
                         }
                     }

                     analyzedKey = Regex.Replace(analyzedKey, "(^| )\u0000$", "");

                     if (preserveSep && lastRemoved)
                     {
                         analyzedKey += " ";
                     }

                     // Don't add same surface form more than once:
                     if (!seen.Contains(key))
                     {
                         seen.Add(key);
                         break;
                     }
                 }

                 for (int j = 1; j < key.Length; j++)
                 {
                     allPrefixes.Add(key.Substring(0, j - 0));
                 }
                 // we can probably do Integer.MAX_VALUE here, but why worry.
                 int weight = Random.Next(1 << 24);
                 keys[i] = new Input(key, weight);

                 slowCompletor.Add(new TermFreqPayload2(key, analyzedKey, weight));
             }

             if (Verbose)
             {
                 // Don't just sort original list, to avoid VERBOSE
                 // altering the test:
                 List<TermFreqPayload2> sorted = new List<TermFreqPayload2>(slowCompletor);
                 // LUCENENET NOTE: Must use TimSort because comparer is not expecting ties
                 CollectionUtil.TimSort(sorted);
                 foreach (TermFreqPayload2 ent in sorted)
                 {
                     Console.WriteLine("  surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
                 }
             }

             Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
             FuzzySuggester suggester = new FuzzySuggester(a, a,
                                                           preserveSep ? SuggesterOptions.PRESERVE_SEP : 0, 256, -1, true, 1, false, 1, 3, unicodeAware);
             suggester.Build(new InputArrayEnumerator(keys));

             foreach (string prefix in allPrefixes)
             {

                 if (Verbose)
                 {
                     Console.WriteLine("\nTEST: prefix=" + prefix);
                 }

                 int topN = TestUtil.NextInt32(Random, 1, 10);
                 IList<Lookup.LookupResult> r = suggester.DoLookup(TestUtil.StringToCharSequence(prefix, Random).ToString(), false, topN);

                 // 2. go thru whole set to find suggestions:
                 List<Lookup.LookupResult> matches = new List<Lookup.LookupResult>();

                 // "Analyze" the key:
                 string[] tokens = prefix.Split(' ').TrimEnd();
                 StringBuilder builder = new StringBuilder();
                 bool lastRemoved = false;
                 for (int i = 0; i < tokens.Length; i++)
                 {
                     string token = tokens[i];
                     if (preserveSep && builder.Length > 0 && !builder.ToString().EndsWith(" ", StringComparison.Ordinal))
                     {
                         builder.Append(' ');
                     }

                     if (token.Length == 1 && IsStopChar(token[0], numStopChars))
                     {
                         if (preserveSep && preserveHoles)
                         {
                             builder.Append("\u0000");
                         }
                         lastRemoved = true;
                     }
                     else
                     {
                         builder.Append(token);
                         lastRemoved = false;
                     }
                 }

                 string analyzedKey = builder.ToString();

                 // Remove trailing sep/holes (TokenStream.end() does
                 // not tell us any trailing holes, yet ... there is an
                 // issue open for this):
                 while (true)
                 {
                     string s = Regex.Replace(analyzedKey, "(^| )\u0000$", "");
                     s = Regex.Replace(s, "\\s+$", "");
                     if (s.Equals(analyzedKey, StringComparison.Ordinal))
                     {
                         break;
                     }
                     analyzedKey = s;
                 }

                 if (analyzedKey.Length == 0)
                 {
                     // Currently suggester can't suggest from the empty
                     // string!  You get no results, not all results...
                     continue;
                 }

                 if (preserveSep && (prefix.EndsWith(" ", StringComparison.Ordinal) || lastRemoved))
                 {
                     analyzedKey += " ";
                 }

                 if (Verbose)
                 {
                     Console.WriteLine("  analyzed: " + analyzedKey);
                 }
                 TokenStreamToAutomaton tokenStreamToAutomaton = suggester.GetTokenStreamToAutomaton();

                 // NOTE: not great that we ask the suggester to give
                 // us the "answer key" (ie maybe we have a bug in
                 // suggester.toLevA ...) ... but testRandom2() fixes
                 // this:
                 Automaton automaton = suggester.ConvertAutomaton(suggester.ToLevenshteinAutomata(suggester.ToLookupAutomaton(analyzedKey)));
                 assertTrue(automaton.IsDeterministic);
                 // TODO: could be faster... but its slowCompletor for a reason
                 BytesRef spare = new BytesRef();
                 foreach (TermFreqPayload2 e in slowCompletor)
                 {
                     spare.CopyChars(e.analyzedForm);
                     ISet<Int32sRef> finiteStrings = suggester.ToFiniteStrings(spare, tokenStreamToAutomaton);
                     foreach (Int32sRef intsRef in finiteStrings)
                     {
                         State p = automaton.GetInitialState();
                         BytesRef @ref = Lucene.Net.Util.Fst.Util.ToBytesRef(intsRef, spare);
                         bool added = false;
                         for (int i = @ref.Offset; i < @ref.Length; i++)
                         {
                             State q = p.Step(@ref.Bytes[i] & 0xff);
                             if (q == null)
                             {
                                 break;
                             }
                             else if (q.Accept)
                             {
                                 matches.Add(new Lookup.LookupResult(e.surfaceForm, e.weight));
                                 added = true;
                                 break;
                             }
                             p = q;
                         }
                         if (!added && p.Accept)
                         {
                             matches.Add(new Lookup.LookupResult(e.surfaceForm, e.weight));
                         }
                     }
                 }

                 assertTrue(numStopChars > 0 || matches.size() > 0);

                 if (matches.size() > 1)
                 {
                     matches.Sort(new TestRandomComparer());
                 }

                 if (matches.size() > topN)
                 {
                     matches = new List<Lookup.LookupResult>(matches.SubList(0, topN));
                 }

                 if (Verbose)
                 {
                     Console.WriteLine("  expected:");
                     foreach (Lookup.LookupResult lr in matches)
                     {
                         Console.WriteLine("    key=" + lr.Key + " weight=" + lr.Value);
                     }

                     Console.WriteLine("  actual:");
                     foreach (Lookup.LookupResult lr in r)
                     {
                         Console.WriteLine("    key=" + lr.Key + " weight=" + lr.Value);
                     }
                 }


                 assertEquals(prefix + "  " + topN, matches.size(), r.size());
                 for (int hit = 0; hit < r.size(); hit++)
                 {
                     //Console.WriteLine("  check hit " + hit);
                     assertEquals(prefix + "  " + topN, matches[hit].Key.toString(), r[hit].Key.toString());
                     assertEquals(matches[hit].Value, r[hit].Value, 0f);
                 }
             }
         }

         [Test]
         public void TestMaxSurfaceFormsPerAnalyzedForm()
         {
             Analyzer a = new MockAnalyzer(Random);
             FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, true, 1, true, 1, 3, false);

             IList<Input> keys = new Input[] {
                 new Input("a", 40),
                 new Input("a ", 50),
                 new Input(" a", 60),
             };

             keys.Shuffle(Random);
             suggester.Build(new InputArrayEnumerator(keys));

             IList<Lookup.LookupResult> results = suggester.DoLookup("a", false, 5);
             assertEquals(2, results.Count);
             assertEquals(" a", results[0].Key);
             assertEquals(60, results[0].Value);
             assertEquals("a ", results[1].Key);
             assertEquals(50, results[1].Value);
         }

         [Test]
         public void TestEditSeps()
         {
             Analyzer a = new MockAnalyzer(Random);
             FuzzySuggester suggester = new FuzzySuggester(a, a, SuggesterOptions.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);

             IList<Input> keys = new Input[] {
                 new Input("foo bar", 40),
                 new Input("foo bar baz", 50),
                 new Input("barbaz", 60),
                 new Input("barbazfoo", 10),
             };

             keys.Shuffle(Random);
             suggester.Build(new InputArrayEnumerator(keys));

             assertEquals("[foo bar baz/50, foo bar/40]", suggester.DoLookup("foobar", false, 5).toString());
             assertEquals("[foo bar baz/50]", suggester.DoLookup("foobarbaz", false, 5).toString());
             assertEquals("[barbaz/60, barbazfoo/10]", suggester.DoLookup("bar baz", false, 5).toString());
             assertEquals("[barbazfoo/10]", suggester.DoLookup("bar baz foo", false, 5).toString());
         }


         private static string AddRandomEdit(string @string, int prefixLength)
         {
             char[] input = @string.ToCharArray();
             StringBuilder builder = new StringBuilder();
             for (int i = 0; i < input.Length; i++)
             {
                 if (i >= prefixLength && Random.nextBoolean() && i < input.Length - 1)
                 {
                     switch (Random.nextInt(4))
                     {
                         case 3:
                             if (i < input.Length - 1)
                             {
                                 // Transpose input[i] and input[1+i]:
                                 builder.Append(input[i + 1]);
                                 builder.Append(input[i]);
                                 for (int j = i + 2; j < input.Length; j++)
                                 {
                                     builder.Append(input[j]);
                                 }
                                 return builder.ToString();
                             }
                             // NOTE: fall through to delete:
                             goto case 2;
                         case 2:
                             // Delete input[i]
                             for (int j = i + 1; j < input.Length; j++)
                             {
                                 builder.Append(input[j]);
                             }
                             return builder.ToString();
                         case 1:
                             // Insert input[i+1] twice
                             if (i + 1 < input.Length)
                             {
                                 builder.Append(input[i + 1]);
                                 builder.Append(input[i++]);
                                 i++;
                             }
                             for (int j = i; j < input.Length; j++)
                             {
                                 builder.Append(input[j]);
                             }
                             return builder.ToString();
                         case 0:
                             // Insert random byte.
                             // NOTE: can only use ascii here so that, in
                             // UTF8 byte space it's still a single
                             // insertion:
                             // bytes 0x1e and 0x1f are reserved
                             int x = Random.nextBoolean() ? Random.nextInt(30) : 32 + Random.nextInt(128 - 32);
                             builder.Append((char)x);
                             for (int j = i; j < input.Length; j++)
                             {
                                 builder.Append(input[j]);
                             }
                             return builder.ToString();
                     }
                 }

                 builder.Append(input[i]);
             }

             return builder.ToString();
         }

         private string RandomSimpleString(int maxLen)
         {
             int len = TestUtil.NextInt32(Random, 1, maxLen);
             char[] chars = new char[len];
             for (int j = 0; j < len; j++)
             {
                 chars[j] = (char)('a' + Random.nextInt(4));
             }
             return new string(chars);
         }

         internal class TestRandom2Comparer : IComparer<Input>
         {
             public int Compare(Input a, Input b)
             {
                 return a.term.CompareTo(b.term);
             }
         }

         [Test]
         public void TestRandom2()
         {
             int NUM = AtLeast(200);
             List<Input> answers = new List<Input>();
             ISet<string> seen = new JCG.HashSet<string>();
             for (int i = 0; i < NUM; i++)
             {
                 string s = RandomSimpleString(8);
                 if (!seen.Contains(s))
                 {
                     answers.Add(new Input(s, Random.nextInt(1000)));
                     seen.Add(s);
                 }
             }

             answers.Sort(new TestRandom2Comparer());

             if (Verbose)
             {
                 Console.WriteLine("\nTEST: targets");
                 foreach (Input tf in answers)
                 {
                     Console.WriteLine("  " + tf.term.Utf8ToString() + " freq=" + tf.v);
                 }
             }

             Analyzer a = new MockAnalyzer(Random, MockTokenizer.KEYWORD, false);
             int maxEdits = Random.nextBoolean() ? 1 : 2;
             int prefixLen = Random.nextInt(4);
             bool transpositions = Random.nextBoolean();
             // TODO: test graph analyzers
             // TODO: test exactFirst / preserveSep permutations
             FuzzySuggester suggest = new FuzzySuggester(a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);

             if (Verbose)
             {
                 Console.WriteLine("TEST: maxEdits=" + maxEdits + " prefixLen=" + prefixLen + " transpositions=" + transpositions + " num=" + NUM);
             }

             answers.Shuffle(Random);
             suggest.Build(new InputArrayEnumerator(answers.ToArray()));

             int ITERS = AtLeast(100);
             for (int iter = 0; iter < ITERS; iter++)
             {
                 string frag = RandomSimpleString(6);
                 if (Verbose)
                 {
                     Console.WriteLine("\nTEST: iter frag=" + frag);
                 }
                 List<Lookup.LookupResult> expected = SlowFuzzyMatch(prefixLen, maxEdits, transpositions, answers, frag);
                 if (Verbose)
                 {
                     Console.WriteLine("  expected: " + expected.size());
                     foreach (Lookup.LookupResult c in expected)
                     {
                         Console.WriteLine("    " + c);
                     }
                 }
                 List<Lookup.LookupResult> actual = new List<Lookup.LookupResult>(suggest.DoLookup(frag, false, NUM));
                 if (Verbose)
                 {
                     Console.WriteLine("  actual: " + actual.size());
                     foreach (Lookup.LookupResult c in actual)
                     {
                         Console.WriteLine("    " + c);
                     }
                 }

                 actual.Sort(new CompareByCostThenAlpha());

                 int limit = Math.Min(expected.size(), actual.size());
                 for (int ans = 0; ans < limit; ans++)
                 {
                     Lookup.LookupResult c0 = expected[ans];
                     Lookup.LookupResult c1 = actual[ans];
                     assertEquals("expected " + c0.Key +
                                  " but got " + c1.Key,
                                  0,
                                  CHARSEQUENCE_COMPARER.Compare(c0.Key, c1.Key));
                     assertEquals(c0.Value, c1.Value);
                 }
                 assertEquals(expected.size(), actual.size());
             }
         }

         private List<Lookup.LookupResult> SlowFuzzyMatch(int prefixLen, int maxEdits, bool allowTransposition, List<Input> answers, string frag)
         {
             List<Lookup.LookupResult> results = new List<Lookup.LookupResult>();
             int fragLen = frag.Length;
             foreach (Input tf in answers)
             {
                 //Console.WriteLine("  check s=" + tf.term.utf8ToString());
                 bool prefixMatches = true;
                 for (int i = 0; i < prefixLen; i++)
                 {
                     if (i == fragLen)
                     {
                         // Prefix still matches:
                         break;
                     }
                     if (i == tf.term.Length || tf.term.Bytes[i] != (byte)frag[i])
                     {
                         prefixMatches = false;
                         break;
                     }
                 }
                 //Console.WriteLine("    prefixMatches=" + prefixMatches);

                 if (prefixMatches)
                 {
                     int len = tf.term.Length;
                     if (len >= fragLen - maxEdits)
                     {
                         // OK it's possible:
                         //Console.WriteLine("    possible");
                         int d;
                         string s = tf.term.Utf8ToString();
                         if (fragLen == prefixLen)
                         {
                             d = 0;
                         }
                         else if (false && len < fragLen)
                         {
                             d = GetDistance(frag, s, allowTransposition);
                         }
                         else
                         {
                             //Console.WriteLine("    try loop");
                             d = maxEdits + 1;
                             //for(int ed=-maxEdits;ed<=maxEdits;ed++) {
                             for (int ed = -maxEdits; ed <= maxEdits; ed++)
                             {
                                 if (s.Length < fragLen - ed)
                                 {
                                     continue;
                                 }
                                 string check = s.Substring(0, (fragLen - ed) - 0);
                                 d = GetDistance(frag, check, allowTransposition);
                                 //Console.WriteLine("    sub check s=" + check + " d=" + d);
                                 if (d <= maxEdits)
                                 {
                                     break;
                                 }
                             }
                         }
                         if (d <= maxEdits)
                         {
                             results.Add(new Lookup.LookupResult(tf.term.Utf8ToString(), tf.v));
                         }
                     }
                 }

                 results.Sort(new CompareByCostThenAlpha());
             }

             return results;
         }

         internal class CharSequenceComparer : IComparer<string>
         {

             public int Compare(string o1, string o2)
             {
                 int l1 = o1.Length;
                 int l2 = o2.Length;

                 int aStop = Math.Min(l1, l2);
                 for (int i = 0; i < aStop; i++)
                 {
                     int diff = o1[i] - o2[i];
                     if (diff != 0)
                     {
                         return diff;
                     }
                 }
                 // One is a prefix of the other, or, they are equal:
                 return l1 - l2;
             }
         }

         private static readonly IComparer<string> CHARSEQUENCE_COMPARER = new CharSequenceComparer();

         public class CompareByCostThenAlpha : IComparer<Lookup.LookupResult>
         {

             public int Compare(Lookup.LookupResult a, Lookup.LookupResult b)
             {
                 if (a.Value > b.Value)
                 {
                     return -1;
                 }
                 else if (a.Value < b.Value)
                 {
                     return 1;
                 }
                 else
                 {
                     int c = CHARSEQUENCE_COMPARER.Compare(a.Key, b.Key);
                     if (Debugging.AssertsEnabled) Debugging.Assert(c != 0, () => "term=" + a.Key);
                     return c;
                 }
             }
         }

         // NOTE: copied from
         // modules/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java
         // and tweaked to return the edit distance not the float
         // lucene measure

         /* Finds unicode (code point) Levenstein (edit) distance
          * between two strings, including transpositions. */
         public int GetDistance(string target, string other, bool allowTransposition)
         {
             Int32sRef targetPoints;
             Int32sRef otherPoints;
             int n;
             int[][] d; // cost array

             // NOTE: if we cared, we could 3*m space instead of m*n space, similar to
             // what LevenshteinDistance does, except cycling thru a ring of three
             // horizontal cost arrays... but this comparer is never actually used by
             // DirectSpellChecker, its only used for merging results from multiple shards
             // in "distributed spellcheck", and its inefficient in other ways too...

             // cheaper to do this up front once
             targetPoints = ToIntsRef(target);
             otherPoints = ToIntsRef(other);
             n = targetPoints.Length;
             int m = otherPoints.Length;

             d = RectangularArrays.ReturnRectangularArray<int>(n + 1, m + 1);


             if (n == 0 || m == 0)
             {
                 if (n == m)
                 {
                     return 0;
                 }
                 else
                 {
                     return Math.Max(n, m);
                 }
             }

             // indexes into strings s and t
             int i; // iterates through s
             int j; // iterates through t

             int t_j; // jth character of t

             int cost; // cost

             for (i = 0; i <= n; i++)
             {
                 d[i][0] = i;
             }

             for (j = 0; j <= m; j++)
             {
                 d[0][j] = j;
             }

             for (j = 1; j <= m; j++)
             {
                 t_j = otherPoints.Int32s[j - 1];

                 for (i = 1; i <= n; i++)
                 {
                     cost = targetPoints.Int32s[i - 1] == t_j ? 0 : 1;
                     // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
                     d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
                     // transposition
                     if (allowTransposition && i > 1 && j > 1 && targetPoints.Int32s[i - 1] == otherPoints.Int32s[j - 2] && targetPoints.Int32s[i - 2] == otherPoints.Int32s[j - 1])
                     {
                         d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost);
                     }
                 }
             }

             return d[n][m];
         }

         private static Int32sRef ToIntsRef(string s)
         {
             Int32sRef @ref = new Int32sRef(s.Length); // worst case
             int utf16Len = s.Length;
             for (int i = 0, cp = 0; i < utf16Len; i += Character.CharCount(cp))
             {
                 cp = @ref.Int32s[@ref.Length++] = Character.CodePointAt(s, i);
             }
             return @ref;
         }
     }
 }