src/Lucene.Net.Tests.TestFramework/Analysis/TestMockAnalyzer.cs - lucenenet - Git at Google

 // Lucene version compatibility level 8.2.0
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using Lucene.Net.Util.Automaton;
 using System;
 using System.IO;
 using Lucene.Net.TestFramework;

 #if TESTFRAMEWORK_MSTEST
 using Test = Microsoft.VisualStudio.TestTools.UnitTesting.TestMethodAttribute;
 using Assert = Lucene.Net.TestFramework.Assert;
 #elif TESTFRAMEWORK_NUNIT
 using Test = NUnit.Framework.TestAttribute;
 using Assert = NUnit.Framework.Assert;
 #elif TESTFRAMEWORK_XUNIT
 using Test = Lucene.Net.TestFramework.SkippableFactAttribute;
 using Assert = Lucene.Net.TestFramework.Assert;
 #endif

 namespace Lucene.Net.Analysis
 {
     /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

 #if TESTFRAMEWORK_MSTEST
     [Microsoft.VisualStudio.TestTools.UnitTesting.TestClassAttribute]
 #endif
     public class TestMockAnalyzer : BaseTokenStreamTestCase
 #if TESTFRAMEWORK_XUNIT
         , Xunit.IClassFixture<BeforeAfterClass>
     {
         public TestMockAnalyzer(BeforeAfterClass beforeAfter)
             : base(beforeAfter)
         {
         }
 #else
     {
 #endif

         /** Test a configuration that behaves a lot like WhitespaceAnalyzer */
         [Test]
         public void TestWhitespace()
         {
             Analyzer a = new MockAnalyzer(Random);
             AssertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
             new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
             AssertAnalyzesTo(a, "aba cadaba shazam",
                 new String[] { "aba", "cadaba", "shazam" });
             AssertAnalyzesTo(a, "break on whitespace",
                 new String[] { "break", "on", "whitespace" });
         }

         /** Test a configuration that behaves a lot like SimpleAnalyzer */
         [Test]
         public void TestSimple()
         {
             Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
             AssertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
                     new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
             AssertAnalyzesTo(a, "aba4cadaba-Shazam",
                 new String[] { "aba", "cadaba", "shazam" });
             AssertAnalyzesTo(a, "break+on/Letters",
                 new String[] { "break", "on", "letters" });
         }

         /** Test a configuration that behaves a lot like KeywordAnalyzer */
         [Test]
         public void TestKeyword()
         {
             Analyzer a = new MockAnalyzer(Random, MockTokenizer.KEYWORD, false);
             AssertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
                     new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
             AssertAnalyzesTo(a, "aba4cadaba-Shazam",
                 new String[] { "aba4cadaba-Shazam" });
             AssertAnalyzesTo(a, "break+on/Nothing",
                 new String[] { "break+on/Nothing" });
             // currently though emits no tokens for empty string: maybe we can do it,
             // but we don't want to emit tokens infinitely...
             AssertAnalyzesTo(a, "", new String[0]);
         }

         // Test some regular expressions as tokenization patterns
         /** Test a configuration where each character is a term */
         [Test]
         public void TestSingleChar()
         {
             CharacterRunAutomaton single =
                 new CharacterRunAutomaton(new RegExp(".").ToAutomaton());
             Analyzer a = new MockAnalyzer(Random, single, false);
             AssertAnalyzesTo(a, "foobar",
                     new String[] { "f", "o", "o", "b", "a", "r" },
                     new int[] { 0, 1, 2, 3, 4, 5 },
                     new int[] { 1, 2, 3, 4, 5, 6 }
                 );
             CheckRandomData(Random, a, 100);
         }

         /** Test a configuration where two characters makes a term */
         [Test]
         public void TestTwoChars()
         {
             CharacterRunAutomaton single =
                 new CharacterRunAutomaton(new RegExp("..").ToAutomaton());
             Analyzer a = new MockAnalyzer(Random, single, false);
             AssertAnalyzesTo(a, "foobar",
                     new String[] { "fo", "ob", "ar" },
                     new int[] { 0, 2, 4 },
                     new int[] { 2, 4, 6 }
                 );
             // make sure when last term is a "partial" match that end() is correct
             AssertTokenStreamContents(a.GetTokenStream("bogus", "fooba"),
                 new String[] { "fo", "ob" },
                 new int[] { 0, 2 },
                 new int[] { 2, 4 },
                 new int[] { 1, 1 },
                 5
             );
             CheckRandomData(Random, a, 100);
         }

         /** Test a configuration where three characters makes a term */
         [Test]
         public void TestThreeChars()
         {
             CharacterRunAutomaton single =
                 new CharacterRunAutomaton(new RegExp("...").ToAutomaton());
             Analyzer a = new MockAnalyzer(Random, single, false);
             AssertAnalyzesTo(a, "foobar",
                     new String[] { "foo", "bar" },
                     new int[] { 0, 3 },
                     new int[] { 3, 6 }
                 );
             // make sure when last term is a "partial" match that end() is correct
             AssertTokenStreamContents(a.GetTokenStream("bogus", "fooba"),
                 new String[] { "foo" },
                 new int[] { 0 },
                 new int[] { 3 },
                 new int[] { 1 },
                 5
             );
             CheckRandomData(Random, a, 100);
         }

         /** Test a configuration where word starts with one uppercase */
         [Test]
         public void TestUppercase()
         {
             CharacterRunAutomaton single =
                 new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").ToAutomaton());
             Analyzer a = new MockAnalyzer(Random, single, false);
             AssertAnalyzesTo(a, "FooBarBAZ",
                     new String[] { "Foo", "Bar", "B", "A", "Z" },
                     new int[] { 0, 3, 6, 7, 8 },
                     new int[] { 3, 6, 7, 8, 9 }
                 );
             AssertAnalyzesTo(a, "aFooBar",
                 new String[] { "Foo", "Bar" },
                 new int[] { 1, 4 },
                 new int[] { 4, 7 }
             );
             CheckRandomData(Random, a, 100);
         }

         /** Test a configuration that behaves a lot like StopAnalyzer */
         [Test]
         public void TestStop()
         {
             Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
             AssertAnalyzesTo(a, "the quick brown a fox",
                     new String[] { "quick", "brown", "fox" },
                     new int[] { 2, 1, 2 });
         }

         /** Test a configuration that behaves a lot like KeepWordFilter */
         [Test]
         public void TestKeep()
         {
             CharacterRunAutomaton keepWords =
               new CharacterRunAutomaton(
                   BasicOperations.Complement(
                       BasicOperations.Union(
                           BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")) /*,
                       Operations.DEFAULT_MAX_DETERMINIZED_STATES*/));
             Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, keepWords);
             AssertAnalyzesTo(a, "quick foo brown bar bar fox foo",
                     new String[] { "foo", "bar", "bar", "foo" },
                     new int[] { 2, 2, 1, 2 });
         }

         /** Test a configuration that behaves a lot like LengthFilter */
         [Test]
         public void TestLength()
         {
             CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").ToAutomaton());
             Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true, length5);
             AssertAnalyzesTo(a, "ok toolong fine notfine",
                     new String[] { "ok", "fine" },
                     new int[] { 1, 2 });
         }

         /** Test MockTokenizer encountering a too long token */
         [Test]
         public void TestTooLongToken()
         {
             Analyzer whitespace = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
             {
                 Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, 5);
                 return new TokenStreamComponents(t, t);
             });

             AssertTokenStreamContents(whitespace.GetTokenStream("bogus", "test 123 toolong ok "),
                     new String[] { "test", "123", "toolo", "ng", "ok" },
                     new int[] { 0, 5, 9, 14, 17 },
                     new int[] { 4, 8, 14, 16, 19 },
                     20);

             AssertTokenStreamContents(whitespace.GetTokenStream("bogus", "test 123 toolo"),
                 new String[] { "test", "123", "toolo" },
                 new int[] { 0, 5, 9 },
                 new int[] { 4, 8, 14 },
                 14);
         }

         [Test]
         public void TestLUCENE_3042()
         {
             String testString = "t";

             Analyzer analyzer = new MockAnalyzer(Random);
             using (TokenStream stream = analyzer.GetTokenStream("dummy", testString))
             {
                 stream.Reset();
                 while (stream.IncrementToken())
                 {
                     // consume
                 }
                 stream.End();
             }

             AssertAnalyzesTo(analyzer, testString, new String[] { "t" });
         }

         /** blast some random strings through the analyzer */
         [Test]
         public void TestRandomStrings()
         {
             CheckRandomData(Random, new MockAnalyzer(Random), AtLeast(1000));
         }

         /** blast some random strings through differently configured tokenizers */
         [Test]
         public void TestRandomRegexps()
         {
             //int iters = TestNightly ? AtLeast(30) : AtLeast(1);
             // LUCENENET specific - reduced Nightly iterations from 30 to 15
             // to keep it under the 1 hour free limit of Azure DevOps
             int iters = TestNightly ? AtLeast(15) : AtLeast(1);
             for (int i = 0; i < iters; i++)
             {
                 CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random) /*, int.MaxValue*/);
                 bool lowercase = Random.NextBoolean();
                 int limit = TestUtil.NextInt32(Random, 0, 500);
                 Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => {
                     Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit);
                     return new TokenStreamComponents(t, t);
                 });
                 CheckRandomData(Random, a, 100);
                 a.Dispose();
             }
         }

         [Test]
         public void TestForwardOffsets()
         {
             int num = AtLeast(1000);
             for (int i = 0; i < num; i++)
             {
                 String s = TestUtil.RandomHtmlishString(Random, 20);
                 StringReader reader = new StringReader(s);
                 MockCharFilter charfilter = new MockCharFilter(reader, 2);
                 MockAnalyzer analyzer = new MockAnalyzer(Random);
                 using TokenStream ts = analyzer.GetTokenStream("bogus", charfilter);
                 ts.Reset();
                 while (ts.IncrementToken())
                 {
                     ;
                 }
                 ts.End();
             }
         }

         private class AnalyzerWrapperAnonymousClass : AnalyzerWrapper
         {
             private readonly Analyzer @delegate;
             public AnalyzerWrapperAnonymousClass(Analyzer @delegate)
                 : base(@delegate.Strategy)
             {
                 this.@delegate = @delegate;
             }

             protected override TextReader WrapReader(string fieldName, TextReader reader)
             {
                 return new MockCharFilter(reader, 7);
             }
             protected override Analyzer GetWrappedAnalyzer(string fieldName)
             {
                 return @delegate;
             }
         }

         [Test]
         public void TestWrapReader()
         {
             // LUCENE-5153: test that wrapping an analyzer's reader is allowed
             Random random = Random;

             Analyzer @delegate = new MockAnalyzer(random);
             Analyzer a = new AnalyzerWrapperAnonymousClass(@delegate);


             CheckOneTerm(a, "abc", "aabc");
         }

         // LUCENENET NOTE: This has some compatibility issues with Lucene 4.8.1, but need this test when
         // DelegatingAnalyzerWrapper is ported
         //[Test]
         //public void TestChangeGaps()
         //{
         //    // LUCENE-5324: check that it is possible to change the wrapper's gaps
         //     int positionGap = Random.nextInt(1000);
         //     int offsetGap = Random.nextInt(1000);
         //     Analyzer @delegate = new MockAnalyzer(Random);
         //// Analyzer a = new DelegatingAnalyzerWrapper(@delegate.getReuseStrategy()) {
         ////      @Override
         ////      protected Analyzer getWrappedAnalyzer(String fieldName)
         ////{
         ////    return @delegate;
         ////}
         ////@Override
         ////      public int getPositionIncrementGap(String fieldName)
         ////{
         ////    return positionGap;
         ////}
         ////@Override
         ////      public int getOffsetGap(String fieldName)
         ////{
         ////    return offsetGap;
         ////}
         ////    };

         //     RandomIndexWriter writer = new RandomIndexWriter(Random, NewDirectory(), a);
         // Document doc = new Document();
         // FieldType ft = new FieldType();
         //ft.IndexOptions=(IndexOptions.DOCS);
         //    ft.IsTokenized=(true);
         //    ft.setStoreTermVectors(true);
         //    ft.setStoreTermVectorPositions(true);
         //    ft.setStoreTermVectorOffsets(true);
         //    doc.add(new Field("f", "a", ft));
         //    doc.add(new Field("f", "a", ft));
         //    writer.addDocument(doc);
         //     LeafReader reader = getOnlyLeafReader(writer.getReader());
         // Fields fields = reader.getTermVectors(0);
         //     Terms terms = fields.terms("f");
         //     TermsEnum te = terms.iterator();
         //    assertEquals(new BytesRef("a"), te.next());
         //     PostingsEnum dpe = te.postings(null, PostingsEnum.ALL);
         //    assertEquals(0, dpe.nextDoc());
         //assertEquals(2, dpe.freq());
         //assertEquals(0, dpe.nextPosition());
         //assertEquals(0, dpe.startOffset());
         // int endOffset = dpe.endOffset();
         //assertEquals(1 + positionGap, dpe.nextPosition());
         //assertEquals(1 + endOffset + offsetGap, dpe.endOffset());
         //assertEquals(null, te.Next());
         //reader.close();
         //    writer.Dispose();
         //    writer.IndexWriter.Directory.Dispose();
         //  }
     }
 }
	// Lucene version compatibility level 8.2.0
	using Lucene.Net.Support;
	using Lucene.Net.Util;
	using Lucene.Net.Util.Automaton;
	using System;
	using System.IO;
	using Lucene.Net.TestFramework;

	#if TESTFRAMEWORK_MSTEST
	using Test = Microsoft.VisualStudio.TestTools.UnitTesting.TestMethodAttribute;
	using Assert = Lucene.Net.TestFramework.Assert;
	#elif TESTFRAMEWORK_NUNIT
	using Test = NUnit.Framework.TestAttribute;
	using Assert = NUnit.Framework.Assert;
	#elif TESTFRAMEWORK_XUNIT
	using Test = Lucene.Net.TestFramework.SkippableFactAttribute;
	using Assert = Lucene.Net.TestFramework.Assert;
	#endif

	namespace Lucene.Net.Analysis
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#if TESTFRAMEWORK_MSTEST
	[Microsoft.VisualStudio.TestTools.UnitTesting.TestClassAttribute]
	#endif
	public class TestMockAnalyzer : BaseTokenStreamTestCase
	#if TESTFRAMEWORK_XUNIT
	, Xunit.IClassFixture<BeforeAfterClass>
	{
	public TestMockAnalyzer(BeforeAfterClass beforeAfter)
	: base(beforeAfter)
	{
	}
	#else
	{
	#endif

	/** Test a configuration that behaves a lot like WhitespaceAnalyzer */
	[Test]
	public void TestWhitespace()
	{
	Analyzer a = new MockAnalyzer(Random);
	AssertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
	new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
	AssertAnalyzesTo(a, "aba cadaba shazam",
	new String[] { "aba", "cadaba", "shazam" });
	AssertAnalyzesTo(a, "break on whitespace",
	new String[] { "break", "on", "whitespace" });
	}

	/** Test a configuration that behaves a lot like SimpleAnalyzer */
	[Test]
	public void TestSimple()
	{
	Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
	AssertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
	new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
	AssertAnalyzesTo(a, "aba4cadaba-Shazam",
	new String[] { "aba", "cadaba", "shazam" });
	AssertAnalyzesTo(a, "break+on/Letters",
	new String[] { "break", "on", "letters" });
	}

	/** Test a configuration that behaves a lot like KeywordAnalyzer */
	[Test]
	public void TestKeyword()
	{
	Analyzer a = new MockAnalyzer(Random, MockTokenizer.KEYWORD, false);
	AssertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
	new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
	AssertAnalyzesTo(a, "aba4cadaba-Shazam",
	new String[] { "aba4cadaba-Shazam" });
	AssertAnalyzesTo(a, "break+on/Nothing",
	new String[] { "break+on/Nothing" });
	// currently though emits no tokens for empty string: maybe we can do it,
	// but we don't want to emit tokens infinitely...
	AssertAnalyzesTo(a, "", new String[0]);
	}

	// Test some regular expressions as tokenization patterns
	/** Test a configuration where each character is a term */
	[Test]
	public void TestSingleChar()
	{
	CharacterRunAutomaton single =
	new CharacterRunAutomaton(new RegExp(".").ToAutomaton());
	Analyzer a = new MockAnalyzer(Random, single, false);
	AssertAnalyzesTo(a, "foobar",
	new String[] { "f", "o", "o", "b", "a", "r" },
	new int[] { 0, 1, 2, 3, 4, 5 },
	new int[] { 1, 2, 3, 4, 5, 6 }
	);
	CheckRandomData(Random, a, 100);
	}

	/** Test a configuration where two characters makes a term */
	[Test]
	public void TestTwoChars()
	{
	CharacterRunAutomaton single =
	new CharacterRunAutomaton(new RegExp("..").ToAutomaton());
	Analyzer a = new MockAnalyzer(Random, single, false);
	AssertAnalyzesTo(a, "foobar",
	new String[] { "fo", "ob", "ar" },
	new int[] { 0, 2, 4 },
	new int[] { 2, 4, 6 }
	);
	// make sure when last term is a "partial" match that end() is correct
	AssertTokenStreamContents(a.GetTokenStream("bogus", "fooba"),
	new String[] { "fo", "ob" },
	new int[] { 0, 2 },
	new int[] { 2, 4 },
	new int[] { 1, 1 },
	5
	);
	CheckRandomData(Random, a, 100);
	}

	/** Test a configuration where three characters makes a term */
	[Test]
	public void TestThreeChars()
	{
	CharacterRunAutomaton single =
	new CharacterRunAutomaton(new RegExp("...").ToAutomaton());
	Analyzer a = new MockAnalyzer(Random, single, false);
	AssertAnalyzesTo(a, "foobar",
	new String[] { "foo", "bar" },
	new int[] { 0, 3 },
	new int[] { 3, 6 }
	);
	// make sure when last term is a "partial" match that end() is correct
	AssertTokenStreamContents(a.GetTokenStream("bogus", "fooba"),
	new String[] { "foo" },
	new int[] { 0 },
	new int[] { 3 },
	new int[] { 1 },
	5
	);
	CheckRandomData(Random, a, 100);
	}

	/** Test a configuration where word starts with one uppercase */
	[Test]
	public void TestUppercase()
	{
	CharacterRunAutomaton single =
	new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").ToAutomaton());
	Analyzer a = new MockAnalyzer(Random, single, false);
	AssertAnalyzesTo(a, "FooBarBAZ",
	new String[] { "Foo", "Bar", "B", "A", "Z" },
	new int[] { 0, 3, 6, 7, 8 },
	new int[] { 3, 6, 7, 8, 9 }
	);
	AssertAnalyzesTo(a, "aFooBar",
	new String[] { "Foo", "Bar" },
	new int[] { 1, 4 },
	new int[] { 4, 7 }
	);
	CheckRandomData(Random, a, 100);
	}

	/** Test a configuration that behaves a lot like StopAnalyzer */
	[Test]
	public void TestStop()
	{
	Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
	AssertAnalyzesTo(a, "the quick brown a fox",
	new String[] { "quick", "brown", "fox" },
	new int[] { 2, 1, 2 });
	}

	/** Test a configuration that behaves a lot like KeepWordFilter */
	[Test]
	public void TestKeep()
	{
	CharacterRunAutomaton keepWords =
	new CharacterRunAutomaton(
	BasicOperations.Complement(
	BasicOperations.Union(
	BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")) /*,
	Operations.DEFAULT_MAX_DETERMINIZED_STATES*/));
	Analyzer a = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, keepWords);
	AssertAnalyzesTo(a, "quick foo brown bar bar fox foo",
	new String[] { "foo", "bar", "bar", "foo" },
	new int[] { 2, 2, 1, 2 });
	}

	/** Test a configuration that behaves a lot like LengthFilter */
	[Test]
	public void TestLength()
	{
	CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").ToAutomaton());
	Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true, length5);
	AssertAnalyzesTo(a, "ok toolong fine notfine",
	new String[] { "ok", "fine" },
	new int[] { 1, 2 });
	}

	/** Test MockTokenizer encountering a too long token */
	[Test]
	public void TestTooLongToken()
	{
	Analyzer whitespace = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
	{
	Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, 5);
	return new TokenStreamComponents(t, t);
	});

	AssertTokenStreamContents(whitespace.GetTokenStream("bogus", "test 123 toolong ok "),
	new String[] { "test", "123", "toolo", "ng", "ok" },
	new int[] { 0, 5, 9, 14, 17 },
	new int[] { 4, 8, 14, 16, 19 },
	20);

	AssertTokenStreamContents(whitespace.GetTokenStream("bogus", "test 123 toolo"),
	new String[] { "test", "123", "toolo" },
	new int[] { 0, 5, 9 },
	new int[] { 4, 8, 14 },
	14);
	}

	[Test]
	public void TestLUCENE_3042()
	{
	String testString = "t";

	Analyzer analyzer = new MockAnalyzer(Random);
	using (TokenStream stream = analyzer.GetTokenStream("dummy", testString))
	{
	stream.Reset();
	while (stream.IncrementToken())
	{
	// consume
	}
	stream.End();
	}

	AssertAnalyzesTo(analyzer, testString, new String[] { "t" });
	}

	/** blast some random strings through the analyzer */
	[Test]
	public void TestRandomStrings()
	{
	CheckRandomData(Random, new MockAnalyzer(Random), AtLeast(1000));
	}

	/** blast some random strings through differently configured tokenizers */
	[Test]
	public void TestRandomRegexps()
	{
	//int iters = TestNightly ? AtLeast(30) : AtLeast(1);
	// LUCENENET specific - reduced Nightly iterations from 30 to 15
	// to keep it under the 1 hour free limit of Azure DevOps
	int iters = TestNightly ? AtLeast(15) : AtLeast(1);
	for (int i = 0; i < iters; i++)
	{
	CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random) /, int.MaxValue/);
	bool lowercase = Random.NextBoolean();
	int limit = TestUtil.NextInt32(Random, 0, 500);
	Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => {
	Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit);
	return new TokenStreamComponents(t, t);
	});
	CheckRandomData(Random, a, 100);
	a.Dispose();
	}
	}

	[Test]
	public void TestForwardOffsets()
	{
	int num = AtLeast(1000);
	for (int i = 0; i < num; i++)
	{
	String s = TestUtil.RandomHtmlishString(Random, 20);
	StringReader reader = new StringReader(s);
	MockCharFilter charfilter = new MockCharFilter(reader, 2);
	MockAnalyzer analyzer = new MockAnalyzer(Random);
	using TokenStream ts = analyzer.GetTokenStream("bogus", charfilter);
	ts.Reset();
	while (ts.IncrementToken())
	{
	;
	}
	ts.End();
	}
	}

	private class AnalyzerWrapperAnonymousClass : AnalyzerWrapper
	{
	private readonly Analyzer @delegate;
	public AnalyzerWrapperAnonymousClass(Analyzer @delegate)
	: base(@delegate.Strategy)
	{
	this.@delegate = @delegate;
	}

	protected override TextReader WrapReader(string fieldName, TextReader reader)
	{
	return new MockCharFilter(reader, 7);
	}
	protected override Analyzer GetWrappedAnalyzer(string fieldName)
	{
	return @delegate;
	}
	}

	[Test]
	public void TestWrapReader()
	{
	// LUCENE-5153: test that wrapping an analyzer's reader is allowed
	Random random = Random;

	Analyzer @delegate = new MockAnalyzer(random);
	Analyzer a = new AnalyzerWrapperAnonymousClass(@delegate);


	CheckOneTerm(a, "abc", "aabc");
	}

	// LUCENENET NOTE: This has some compatibility issues with Lucene 4.8.1, but need this test when
	// DelegatingAnalyzerWrapper is ported
	//[Test]
	//public void TestChangeGaps()
	//{
	// // LUCENE-5324: check that it is possible to change the wrapper's gaps
	// int positionGap = Random.nextInt(1000);
	// int offsetGap = Random.nextInt(1000);
	// Analyzer @delegate = new MockAnalyzer(Random);
	//// Analyzer a = new DelegatingAnalyzerWrapper(@delegate.getReuseStrategy()) {
	//// @Override
	//// protected Analyzer getWrappedAnalyzer(String fieldName)
	////{
	//// return @delegate;
	////}
	////@Override
	//// public int getPositionIncrementGap(String fieldName)
	////{
	//// return positionGap;
	////}
	////@Override
	//// public int getOffsetGap(String fieldName)
	////{
	//// return offsetGap;
	////}
	//// };

	// RandomIndexWriter writer = new RandomIndexWriter(Random, NewDirectory(), a);
	// Document doc = new Document();
	// FieldType ft = new FieldType();
	//ft.IndexOptions=(IndexOptions.DOCS);
	// ft.IsTokenized=(true);
	// ft.setStoreTermVectors(true);
	// ft.setStoreTermVectorPositions(true);
	// ft.setStoreTermVectorOffsets(true);
	// doc.add(new Field("f", "a", ft));
	// doc.add(new Field("f", "a", ft));
	// writer.addDocument(doc);
	// LeafReader reader = getOnlyLeafReader(writer.getReader());
	// Fields fields = reader.getTermVectors(0);
	// Terms terms = fields.terms("f");
	// TermsEnum te = terms.iterator();
	// assertEquals(new BytesRef("a"), te.next());
	// PostingsEnum dpe = te.postings(null, PostingsEnum.ALL);
	// assertEquals(0, dpe.nextDoc());
	//assertEquals(2, dpe.freq());
	//assertEquals(0, dpe.nextPosition());
	//assertEquals(0, dpe.startOffset());
	// int endOffset = dpe.endOffset();
	//assertEquals(1 + positionGap, dpe.nextPosition());
	//assertEquals(1 + endOffset + offsetGap, dpe.endOffset());
	//assertEquals(null, te.Next());
	//reader.close();
	// writer.Dispose();
	// writer.IndexWriter.Directory.Dispose();
	// }
	}
	}