src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/CommonGramsFilterTest.cs - lucenenet - Git at Google

 using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
 using NUnit.Framework;
 using System.IO;

 namespace Lucene.Net.Analysis.CommonGrams
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// Tests CommonGrams(Query)Filter
     /// </summary>
     public class CommonGramsFilterTest : BaseTokenStreamTestCase
     {
         private static readonly CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "s", "a", "b", "c", "d", "the", "of" }, false);

         [Test]
         public virtual void TestReset()
         {
             const string input = "How the s a brown s cow d like A B thing?";
             WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);

             ICharTermAttribute term = cgf.AddAttribute<ICharTermAttribute>();
             cgf.Reset();
             assertTrue(cgf.IncrementToken());
             assertEquals("How", term.ToString());
             assertTrue(cgf.IncrementToken());
             assertEquals("How_the", term.ToString());
             assertTrue(cgf.IncrementToken());
             assertEquals("the", term.ToString());
             assertTrue(cgf.IncrementToken());
             assertEquals("the_s", term.ToString());
             cgf.Dispose();

             wt.SetReader(new StringReader(input));
             cgf.Reset();
             assertTrue(cgf.IncrementToken());
             assertEquals("How", term.ToString());
         }

         [Test]
         public virtual void TestQueryReset()
         {
             const string input = "How the s a brown s cow d like A B thing?";
             WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);

             ICharTermAttribute term = wt.AddAttribute<ICharTermAttribute>();
             nsf.Reset();
             assertTrue(nsf.IncrementToken());
             assertEquals("How_the", term.ToString());
             assertTrue(nsf.IncrementToken());
             assertEquals("the_s", term.ToString());
             nsf.Dispose();

             wt.SetReader(new StringReader(input));
             nsf.Reset();
             assertTrue(nsf.IncrementToken());
             assertEquals("How_the", term.ToString());
         }

         /// <summary>
         /// This is for testing CommonGramsQueryFilter which outputs a set of tokens
         /// optimized for querying with only one token at each position, either a
         /// unigram or a bigram It also will not return a token for the final position
         /// if the final word is already in the preceding bigram Example:(three
         /// tokens/positions in)
         /// "foo bar the"=>"foo:1|bar:2,bar-the:2|the:3=> "foo" "bar-the" (2 tokens
         /// out)
         ///
         /// </summary>
         [Test]
         public virtual void TestCommonGramsQueryFilter()
         {
             Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

             // Stop words used below are "of" "the" and "s"

             // two word queries
             AssertAnalyzesTo(a, "brown fox", new string[] { "brown", "fox" });
             AssertAnalyzesTo(a, "the fox", new string[] { "the_fox" });
             AssertAnalyzesTo(a, "fox of", new string[] { "fox_of" });
             AssertAnalyzesTo(a, "of the", new string[] { "of_the" });

             // one word queries
             AssertAnalyzesTo(a, "the", new string[] { "the" });
             AssertAnalyzesTo(a, "foo", new string[] { "foo" });

             // 3 word combinations s=stopword/common word n=not a stop word
             AssertAnalyzesTo(a, "n n n", new string[] { "n", "n", "n" });
             AssertAnalyzesTo(a, "quick brown fox", new string[] { "quick", "brown", "fox" });

             AssertAnalyzesTo(a, "n n s", new string[] { "n", "n_s" });
             AssertAnalyzesTo(a, "quick brown the", new string[] { "quick", "brown_the" });

             AssertAnalyzesTo(a, "n s n", new string[] { "n_s", "s_n" });
             AssertAnalyzesTo(a, "quick the brown", new string[] { "quick_the", "the_brown" });

             AssertAnalyzesTo(a, "n s s", new string[] { "n_s", "s_s" });
             AssertAnalyzesTo(a, "fox of the", new string[] { "fox_of", "of_the" });

             AssertAnalyzesTo(a, "s n n", new string[] { "s_n", "n", "n" });
             AssertAnalyzesTo(a, "the quick brown", new string[] { "the_quick", "quick", "brown" });

             AssertAnalyzesTo(a, "s n s", new string[] { "s_n", "n_s" });
             AssertAnalyzesTo(a, "the fox of", new string[] { "the_fox", "fox_of" });

             AssertAnalyzesTo(a, "s s n", new string[] { "s_s", "s_n" });
             AssertAnalyzesTo(a, "of the fox", new string[] { "of_the", "the_fox" });

             AssertAnalyzesTo(a, "s s s", new string[] { "s_s", "s_s" });
             AssertAnalyzesTo(a, "of the of", new string[] { "of_the", "the_of" });
         }

         private class AnalyzerAnonymousInnerClassHelper : Analyzer
         {
             private readonly CommonGramsFilterTest outerInstance;

             public AnalyzerAnonymousInnerClassHelper(CommonGramsFilterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }

             protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                 return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords)));
             }
         }

         [Test]
         public virtual void TestCommonGramsFilter()
         {
             Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

             // Stop words used below are "of" "the" and "s"
             // one word queries
             AssertAnalyzesTo(a, "the", new string[] { "the" });
             AssertAnalyzesTo(a, "foo", new string[] { "foo" });

             // two word queries
             AssertAnalyzesTo(a, "brown fox", new string[] { "brown", "fox" }, new int[] { 1, 1 });
             AssertAnalyzesTo(a, "the fox", new string[] { "the", "the_fox", "fox" }, new int[] { 1, 0, 1 });
             AssertAnalyzesTo(a, "fox of", new string[] { "fox", "fox_of", "of" }, new int[] { 1, 0, 1 });
             AssertAnalyzesTo(a, "of the", new string[] { "of", "of_the", "the" }, new int[] { 1, 0, 1 });

             // 3 word combinations s=stopword/common word n=not a stop word
             AssertAnalyzesTo(a, "n n n", new string[] { "n", "n", "n" }, new int[] { 1, 1, 1 });
             AssertAnalyzesTo(a, "quick brown fox", new string[] { "quick", "brown", "fox" }, new int[] { 1, 1, 1 });

             AssertAnalyzesTo(a, "n n s", new string[] { "n", "n", "n_s", "s" }, new int[] { 1, 1, 0, 1 });
             AssertAnalyzesTo(a, "quick brown the", new string[] { "quick", "brown", "brown_the", "the" }, new int[] { 1, 1, 0, 1 });

             AssertAnalyzesTo(a, "n s n", new string[] { "n", "n_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 });
             AssertAnalyzesTo(a, "quick the fox", new string[] { "quick", "quick_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 });

             AssertAnalyzesTo(a, "n s s", new string[] { "n", "n_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
             AssertAnalyzesTo(a, "fox of the", new string[] { "fox", "fox_of", "of", "of_the", "the" }, new int[] { 1, 0, 1, 0, 1 });

             AssertAnalyzesTo(a, "s n n", new string[] { "s", "s_n", "n", "n" }, new int[] { 1, 0, 1, 1 });
             AssertAnalyzesTo(a, "the quick brown", new string[] { "the", "the_quick", "quick", "brown" }, new int[] { 1, 0, 1, 1 });

             AssertAnalyzesTo(a, "s n s", new string[] { "s", "s_n", "n", "n_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
             AssertAnalyzesTo(a, "the fox of", new string[] { "the", "the_fox", "fox", "fox_of", "of" }, new int[] { 1, 0, 1, 0, 1 });

             AssertAnalyzesTo(a, "s s n", new string[] { "s", "s_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 });
             AssertAnalyzesTo(a, "of the fox", new string[] { "of", "of_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 });

             AssertAnalyzesTo(a, "s s s", new string[] { "s", "s_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
             AssertAnalyzesTo(a, "of the of", new string[] { "of", "of_the", "the", "the_of", "of" }, new int[] { 1, 0, 1, 0, 1 });
         }

         private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
         {
             private readonly CommonGramsFilterTest outerInstance;

             public AnalyzerAnonymousInnerClassHelper2(CommonGramsFilterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }

             protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                 return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords));
             }
         }

         /// <summary>
         /// Test that CommonGramsFilter works correctly in case-insensitive mode
         /// </summary>
         [Test]
         public virtual void TestCaseSensitive()
         {
             const string input = "How The s a brown s cow d like A B thing?";
             MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
             TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             AssertTokenStreamContents(cgf, new string[] { "How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?" });
         }

         /// <summary>
         /// Test CommonGramsQueryFilter in the case that the last word is a stopword
         /// </summary>
         [Test]
         public virtual void TestLastWordisStopWord()
         {
             const string input = "dog the";
             MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             TokenFilter nsf = new CommonGramsQueryFilter(cgf);
             AssertTokenStreamContents(nsf, new string[] { "dog_the" });
         }

         /// <summary>
         /// Test CommonGramsQueryFilter in the case that the first word is a stopword
         /// </summary>
         [Test]
         public virtual void TestFirstWordisStopWord()
         {
             const string input = "the dog";
             MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             TokenFilter nsf = new CommonGramsQueryFilter(cgf);
             AssertTokenStreamContents(nsf, new string[] { "the_dog" });
         }

         /// <summary>
         /// Test CommonGramsQueryFilter in the case of a single (stop)word query
         /// </summary>
         [Test]
         public virtual void TestOneWordQueryStopWord()
         {
             const string input = "the";
             MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             TokenFilter nsf = new CommonGramsQueryFilter(cgf);
             AssertTokenStreamContents(nsf, new string[] { "the" });
         }

         /// <summary>
         /// Test CommonGramsQueryFilter in the case of a single word query
         /// </summary>
         [Test]
         public virtual void TestOneWordQuery()
         {
             const string input = "monster";
             MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             TokenFilter nsf = new CommonGramsQueryFilter(cgf);
             AssertTokenStreamContents(nsf, new string[] { "monster" });
         }

         /// <summary>
         /// Test CommonGramsQueryFilter when first and last words are stopwords.
         /// </summary>
         [Test]
         public virtual void TestFirstAndLastStopWord()
         {
             const string input = "the of";
             MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
             CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
             TokenFilter nsf = new CommonGramsQueryFilter(cgf);
             AssertTokenStreamContents(nsf, new string[] { "the_of" });
         }

         /// <summary>
         /// blast some random strings through the analyzer </summary>
         [Test]
         public virtual void TestRandomStrings()
         {
             Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

             CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER);

             Analyzer b = new AnalyzerAnonymousInnerClassHelper4(this);

             CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER);
         }

         private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
         {
             private readonly CommonGramsFilterTest outerInstance;

             public AnalyzerAnonymousInnerClassHelper3(CommonGramsFilterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }


             protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                 CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
                 return new TokenStreamComponents(t, cgf);
             }
         }

         private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
         {
             private readonly CommonGramsFilterTest outerInstance;

             public AnalyzerAnonymousInnerClassHelper4(CommonGramsFilterTest outerInstance)
             {
                 this.outerInstance = outerInstance;
             }


             protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
                 Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                 CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
                 return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf));
             }
         }
     }
 }
	using Lucene.Net.Analysis.Core;
	using Lucene.Net.Analysis.TokenAttributes;
	using Lucene.Net.Analysis.Util;
	using NUnit.Framework;
	using System.IO;

	namespace Lucene.Net.Analysis.CommonGrams
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// Tests CommonGrams(Query)Filter
	/// </summary>
	public class CommonGramsFilterTest : BaseTokenStreamTestCase
	{
	private static readonly CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "s", "a", "b", "c", "d", "the", "of" }, false);

	[Test]
	public virtual void TestReset()
	{
	const string input = "How the s a brown s cow d like A B thing?";
	WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);

	ICharTermAttribute term = cgf.AddAttribute<ICharTermAttribute>();
	cgf.Reset();
	assertTrue(cgf.IncrementToken());
	assertEquals("How", term.ToString());
	assertTrue(cgf.IncrementToken());
	assertEquals("How_the", term.ToString());
	assertTrue(cgf.IncrementToken());
	assertEquals("the", term.ToString());
	assertTrue(cgf.IncrementToken());
	assertEquals("the_s", term.ToString());
	cgf.Dispose();

	wt.SetReader(new StringReader(input));
	cgf.Reset();
	assertTrue(cgf.IncrementToken());
	assertEquals("How", term.ToString());
	}

	[Test]
	public virtual void TestQueryReset()
	{
	const string input = "How the s a brown s cow d like A B thing?";
	WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);

	ICharTermAttribute term = wt.AddAttribute<ICharTermAttribute>();
	nsf.Reset();
	assertTrue(nsf.IncrementToken());
	assertEquals("How_the", term.ToString());
	assertTrue(nsf.IncrementToken());
	assertEquals("the_s", term.ToString());
	nsf.Dispose();

	wt.SetReader(new StringReader(input));
	nsf.Reset();
	assertTrue(nsf.IncrementToken());
	assertEquals("How_the", term.ToString());
	}

	/// <summary>
	/// This is for testing CommonGramsQueryFilter which outputs a set of tokens
	/// optimized for querying with only one token at each position, either a
	/// unigram or a bigram It also will not return a token for the final position
	/// if the final word is already in the preceding bigram Example:(three
	/// tokens/positions in)
	/// "foo bar the"=>"foo:1\|bar:2,bar-the:2\|the:3=> "foo" "bar-the" (2 tokens
	/// out)
	///
	/// </summary>
	[Test]
	public virtual void TestCommonGramsQueryFilter()
	{
	Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);

	// Stop words used below are "of" "the" and "s"

	// two word queries
	AssertAnalyzesTo(a, "brown fox", new string[] { "brown", "fox" });
	AssertAnalyzesTo(a, "the fox", new string[] { "the_fox" });
	AssertAnalyzesTo(a, "fox of", new string[] { "fox_of" });
	AssertAnalyzesTo(a, "of the", new string[] { "of_the" });

	// one word queries
	AssertAnalyzesTo(a, "the", new string[] { "the" });
	AssertAnalyzesTo(a, "foo", new string[] { "foo" });

	// 3 word combinations s=stopword/common word n=not a stop word
	AssertAnalyzesTo(a, "n n n", new string[] { "n", "n", "n" });
	AssertAnalyzesTo(a, "quick brown fox", new string[] { "quick", "brown", "fox" });

	AssertAnalyzesTo(a, "n n s", new string[] { "n", "n_s" });
	AssertAnalyzesTo(a, "quick brown the", new string[] { "quick", "brown_the" });

	AssertAnalyzesTo(a, "n s n", new string[] { "n_s", "s_n" });
	AssertAnalyzesTo(a, "quick the brown", new string[] { "quick_the", "the_brown" });

	AssertAnalyzesTo(a, "n s s", new string[] { "n_s", "s_s" });
	AssertAnalyzesTo(a, "fox of the", new string[] { "fox_of", "of_the" });

	AssertAnalyzesTo(a, "s n n", new string[] { "s_n", "n", "n" });
	AssertAnalyzesTo(a, "the quick brown", new string[] { "the_quick", "quick", "brown" });

	AssertAnalyzesTo(a, "s n s", new string[] { "s_n", "n_s" });
	AssertAnalyzesTo(a, "the fox of", new string[] { "the_fox", "fox_of" });

	AssertAnalyzesTo(a, "s s n", new string[] { "s_s", "s_n" });
	AssertAnalyzesTo(a, "of the fox", new string[] { "of_the", "the_fox" });

	AssertAnalyzesTo(a, "s s s", new string[] { "s_s", "s_s" });
	AssertAnalyzesTo(a, "of the of", new string[] { "of_the", "the_of" });
	}

	private class AnalyzerAnonymousInnerClassHelper : Analyzer
	{
	private readonly CommonGramsFilterTest outerInstance;

	public AnalyzerAnonymousInnerClassHelper(CommonGramsFilterTest outerInstance)
	{
	this.outerInstance = outerInstance;
	}

	protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
	{
	Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
	return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords)));
	}
	}

	[Test]
	public virtual void TestCommonGramsFilter()
	{
	Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);

	// Stop words used below are "of" "the" and "s"
	// one word queries
	AssertAnalyzesTo(a, "the", new string[] { "the" });
	AssertAnalyzesTo(a, "foo", new string[] { "foo" });

	// two word queries
	AssertAnalyzesTo(a, "brown fox", new string[] { "brown", "fox" }, new int[] { 1, 1 });
	AssertAnalyzesTo(a, "the fox", new string[] { "the", "the_fox", "fox" }, new int[] { 1, 0, 1 });
	AssertAnalyzesTo(a, "fox of", new string[] { "fox", "fox_of", "of" }, new int[] { 1, 0, 1 });
	AssertAnalyzesTo(a, "of the", new string[] { "of", "of_the", "the" }, new int[] { 1, 0, 1 });

	// 3 word combinations s=stopword/common word n=not a stop word
	AssertAnalyzesTo(a, "n n n", new string[] { "n", "n", "n" }, new int[] { 1, 1, 1 });
	AssertAnalyzesTo(a, "quick brown fox", new string[] { "quick", "brown", "fox" }, new int[] { 1, 1, 1 });

	AssertAnalyzesTo(a, "n n s", new string[] { "n", "n", "n_s", "s" }, new int[] { 1, 1, 0, 1 });
	AssertAnalyzesTo(a, "quick brown the", new string[] { "quick", "brown", "brown_the", "the" }, new int[] { 1, 1, 0, 1 });

	AssertAnalyzesTo(a, "n s n", new string[] { "n", "n_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 });
	AssertAnalyzesTo(a, "quick the fox", new string[] { "quick", "quick_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 });

	AssertAnalyzesTo(a, "n s s", new string[] { "n", "n_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
	AssertAnalyzesTo(a, "fox of the", new string[] { "fox", "fox_of", "of", "of_the", "the" }, new int[] { 1, 0, 1, 0, 1 });

	AssertAnalyzesTo(a, "s n n", new string[] { "s", "s_n", "n", "n" }, new int[] { 1, 0, 1, 1 });
	AssertAnalyzesTo(a, "the quick brown", new string[] { "the", "the_quick", "quick", "brown" }, new int[] { 1, 0, 1, 1 });

	AssertAnalyzesTo(a, "s n s", new string[] { "s", "s_n", "n", "n_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
	AssertAnalyzesTo(a, "the fox of", new string[] { "the", "the_fox", "fox", "fox_of", "of" }, new int[] { 1, 0, 1, 0, 1 });

	AssertAnalyzesTo(a, "s s n", new string[] { "s", "s_s", "s", "s_n", "n" }, new int[] { 1, 0, 1, 0, 1 });
	AssertAnalyzesTo(a, "of the fox", new string[] { "of", "of_the", "the", "the_fox", "fox" }, new int[] { 1, 0, 1, 0, 1 });

	AssertAnalyzesTo(a, "s s s", new string[] { "s", "s_s", "s", "s_s", "s" }, new int[] { 1, 0, 1, 0, 1 });
	AssertAnalyzesTo(a, "of the of", new string[] { "of", "of_the", "the", "the_of", "of" }, new int[] { 1, 0, 1, 0, 1 });
	}

	private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
	{
	private readonly CommonGramsFilterTest outerInstance;

	public AnalyzerAnonymousInnerClassHelper2(CommonGramsFilterTest outerInstance)
	{
	this.outerInstance = outerInstance;
	}

	protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
	{
	Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
	return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, tokenizer, commonWords));
	}
	}

	/// <summary>
	/// Test that CommonGramsFilter works correctly in case-insensitive mode
	/// </summary>
	[Test]
	public virtual void TestCaseSensitive()
	{
	const string input = "How The s a brown s cow d like A B thing?";
	MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
	TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	AssertTokenStreamContents(cgf, new string[] { "How", "The", "The_s", "s", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "cow_d", "d", "d_like", "like", "A", "B", "thing?" });
	}

	/// <summary>
	/// Test CommonGramsQueryFilter in the case that the last word is a stopword
	/// </summary>
	[Test]
	public virtual void TestLastWordisStopWord()
	{
	const string input = "dog the";
	MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	TokenFilter nsf = new CommonGramsQueryFilter(cgf);
	AssertTokenStreamContents(nsf, new string[] { "dog_the" });
	}

	/// <summary>
	/// Test CommonGramsQueryFilter in the case that the first word is a stopword
	/// </summary>
	[Test]
	public virtual void TestFirstWordisStopWord()
	{
	const string input = "the dog";
	MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	TokenFilter nsf = new CommonGramsQueryFilter(cgf);
	AssertTokenStreamContents(nsf, new string[] { "the_dog" });
	}

	/// <summary>
	/// Test CommonGramsQueryFilter in the case of a single (stop)word query
	/// </summary>
	[Test]
	public virtual void TestOneWordQueryStopWord()
	{
	const string input = "the";
	MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	TokenFilter nsf = new CommonGramsQueryFilter(cgf);
	AssertTokenStreamContents(nsf, new string[] { "the" });
	}

	/// <summary>
	/// Test CommonGramsQueryFilter in the case of a single word query
	/// </summary>
	[Test]
	public virtual void TestOneWordQuery()
	{
	const string input = "monster";
	MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	TokenFilter nsf = new CommonGramsQueryFilter(cgf);
	AssertTokenStreamContents(nsf, new string[] { "monster" });
	}

	/// <summary>
	/// Test CommonGramsQueryFilter when first and last words are stopwords.
	/// </summary>
	[Test]
	public virtual void TestFirstAndLastStopWord()
	{
	const string input = "the of";
	MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
	TokenFilter nsf = new CommonGramsQueryFilter(cgf);
	AssertTokenStreamContents(nsf, new string[] { "the_of" });
	}

	/// <summary>
	/// blast some random strings through the analyzer </summary>
	[Test]
	public virtual void TestRandomStrings()
	{
	Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

	CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER);

	Analyzer b = new AnalyzerAnonymousInnerClassHelper4(this);

	CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER);
	}

	private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
	{
	private readonly CommonGramsFilterTest outerInstance;

	public AnalyzerAnonymousInnerClassHelper3(CommonGramsFilterTest outerInstance)
	{
	this.outerInstance = outerInstance;
	}


	protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
	{
	Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
	return new TokenStreamComponents(t, cgf);
	}
	}

	private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
	{
	private readonly CommonGramsFilterTest outerInstance;

	public AnalyzerAnonymousInnerClassHelper4(CommonGramsFilterTest outerInstance)
	{
	this.outerInstance = outerInstance;
	}


	protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
	{
	Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
	CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, t, commonWords);
	return new TokenStreamComponents(t, new CommonGramsQueryFilter(cgf));
	}
	}
	}
	}