src/Lucene.Net.Tests/Index/TestTermsEnum2.cs - lucenenet - Git at Google

 using J2N.Collections.Generic.Extensions;
 using Lucene.Net.Documents;
 using Lucene.Net.Index.Extensions;
 using Lucene.Net.Util.Automaton;
 using NUnit.Framework;
 using System;
 using System.Collections.Generic;
 using Assert = Lucene.Net.TestFramework.Assert;
 using JCG = J2N.Collections.Generic;

 namespace Lucene.Net.Index
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using AutomatonQuery = Lucene.Net.Search.AutomatonQuery;
     using BytesRef = Lucene.Net.Util.BytesRef;
     using CheckHits = Lucene.Net.Search.CheckHits;
     using Codec = Lucene.Net.Codecs.Codec;
     using Directory = Lucene.Net.Store.Directory;
     using Document = Documents.Document;
     using Field = Field;
     using IndexSearcher = Lucene.Net.Search.IndexSearcher;
     using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
     using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
     using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
     using SeekStatus = Lucene.Net.Index.TermsEnum.SeekStatus;
     using TestUtil = Lucene.Net.Util.TestUtil;

     [TestFixture]
     public class TestTermsEnum2 : LuceneTestCase
     {
         private Directory dir;
         private IndexReader reader;
         private IndexSearcher searcher;
         private JCG.SortedSet<BytesRef> terms; // the terms we put in the index
         private Automaton termsAutomaton; // automata of the same
         internal int numIterations;

         [SetUp]
         public override void SetUp()
         {
             base.SetUp();
             // we generate aweful regexps: good for testing.
             // but for preflex codec, the test can be very slow, so use less iterations.
             numIterations = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) ? 10 * RandomMultiplier : AtLeast(50);
             dir = NewDirectory();
             RandomIndexWriter writer = new RandomIndexWriter(Random, dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
             Document doc = new Document();
             Field field = NewStringField("field", "", Field.Store.YES);
             doc.Add(field);
             terms = new JCG.SortedSet<BytesRef>();

             int num = AtLeast(200);
             for (int i = 0; i < num; i++)
             {
                 string s = TestUtil.RandomUnicodeString(Random);
                 field.SetStringValue(s);
                 terms.Add(new BytesRef(s));
                 writer.AddDocument(doc);
             }

             termsAutomaton = BasicAutomata.MakeStringUnion(terms);

             reader = writer.GetReader();
             searcher = NewSearcher(reader);
             writer.Dispose();
         }

         [TearDown]
         public override void TearDown()
         {
             reader.Dispose();
             dir.Dispose();
             base.TearDown();
         }

         /// <summary>
         /// tests a pre-intersected automaton against the original </summary>
         [Test]
         public virtual void TestFiniteVersusInfinite()
         {
             for (int i = 0; i < numIterations; i++)
             {
                 string reg = AutomatonTestUtil.RandomRegexp(Random);
                 Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                 IList<BytesRef> matchedTerms = new List<BytesRef>();
                 foreach (BytesRef t in terms)
                 {
                     if (BasicOperations.Run(automaton, t.Utf8ToString()))
                     {
                         matchedTerms.Add(t);
                     }
                 }

                 Automaton alternate = BasicAutomata.MakeStringUnion(matchedTerms);
                 //System.out.println("match " + matchedTerms.Size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
                 //AutomatonTestUtil.minimizeSimple(alternate);
                 //System.out.println("minmize done");
                 AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
                 AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
                 CheckHits.CheckEqual(a1, searcher.Search(a1, 25).ScoreDocs, searcher.Search(a2, 25).ScoreDocs);
             }
         }

         /// <summary>
         /// seeks to every term accepted by some automata </summary>
         [Test]
         public virtual void TestSeeking()
         {
             for (int i = 0; i < numIterations; i++)
             {
                 string reg = AutomatonTestUtil.RandomRegexp(Random);
                 Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                 TermsEnum te = MultiFields.GetTerms(reader, "field").GetEnumerator();
                 IList<BytesRef> unsortedTerms = new List<BytesRef>(terms);
                 unsortedTerms.Shuffle(Random);

                 foreach (BytesRef term in unsortedTerms)
                 {
                     if (BasicOperations.Run(automaton, term.Utf8ToString()))
                     {
                         // term is accepted
                         if (Random.NextBoolean())
                         {
                             // seek exact
                             Assert.IsTrue(te.SeekExact(term));
                         }
                         else
                         {
                             // seek ceil
                             Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
                             Assert.AreEqual(term, te.Term);
                         }
                     }
                 }
             }
         }

         /// <summary>
         /// mixes up seek and next for all terms </summary>
         [Test]
         public virtual void TestSeekingAndNexting()
         {
             for (int i = 0; i < numIterations; i++)
             {
                 TermsEnum te = MultiFields.GetTerms(reader, "field").GetEnumerator();

                 foreach (BytesRef term in terms)
                 {
                     int c = Random.Next(3);
                     if (c == 0)
                     {
                         Assert.IsTrue(te.MoveNext());
                         Assert.AreEqual(term, te.Term);
                     }
                     else if (c == 1)
                     {
                         Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
                         Assert.AreEqual(term, te.Term);
                     }
                     else
                     {
                         Assert.IsTrue(te.SeekExact(term));
                     }
                 }
             }
         }

         /// <summary>
         /// tests intersect: TODO start at a random term! </summary>
         [Test]
         public virtual void TestIntersect()
         {
             for (int i = 0; i < numIterations; i++)
             {
                 string reg = AutomatonTestUtil.RandomRegexp(Random);
                 Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
                 CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
                 TermsEnum te = MultiFields.GetTerms(reader, "field").Intersect(ca, null);
                 Automaton expected = BasicOperations.Intersection(termsAutomaton, automaton);
                 JCG.SortedSet<BytesRef> found = new JCG.SortedSet<BytesRef>();
                 while (te.MoveNext())
                 {
                     found.Add(BytesRef.DeepCopyOf(te.Term));
                 }

                 Automaton actual = BasicAutomata.MakeStringUnion(found);
                 Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
             }
         }
     }
 }
	using J2N.Collections.Generic.Extensions;
	using Lucene.Net.Documents;
	using Lucene.Net.Index.Extensions;
	using Lucene.Net.Util.Automaton;
	using NUnit.Framework;
	using System;
	using System.Collections.Generic;
	using Assert = Lucene.Net.TestFramework.Assert;
	using JCG = J2N.Collections.Generic;

	namespace Lucene.Net.Index
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using AutomatonQuery = Lucene.Net.Search.AutomatonQuery;
	using BytesRef = Lucene.Net.Util.BytesRef;
	using CheckHits = Lucene.Net.Search.CheckHits;
	using Codec = Lucene.Net.Codecs.Codec;
	using Directory = Lucene.Net.Store.Directory;
	using Document = Documents.Document;
	using Field = Field;
	using IndexSearcher = Lucene.Net.Search.IndexSearcher;
	using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
	using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
	using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
	using SeekStatus = Lucene.Net.Index.TermsEnum.SeekStatus;
	using TestUtil = Lucene.Net.Util.TestUtil;

	[TestFixture]
	public class TestTermsEnum2 : LuceneTestCase
	{
	private Directory dir;
	private IndexReader reader;
	private IndexSearcher searcher;
	private JCG.SortedSet<BytesRef> terms; // the terms we put in the index
	private Automaton termsAutomaton; // automata of the same
	internal int numIterations;

	[SetUp]
	public override void SetUp()
	{
	base.SetUp();
	// we generate aweful regexps: good for testing.
	// but for preflex codec, the test can be very slow, so use less iterations.
	numIterations = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) ? 10 * RandomMultiplier : AtLeast(50);
	dir = NewDirectory();
	RandomIndexWriter writer = new RandomIndexWriter(Random, dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));
	Document doc = new Document();
	Field field = NewStringField("field", "", Field.Store.YES);
	doc.Add(field);
	terms = new JCG.SortedSet<BytesRef>();

	int num = AtLeast(200);
	for (int i = 0; i < num; i++)
	{
	string s = TestUtil.RandomUnicodeString(Random);
	field.SetStringValue(s);
	terms.Add(new BytesRef(s));
	writer.AddDocument(doc);
	}

	termsAutomaton = BasicAutomata.MakeStringUnion(terms);

	reader = writer.GetReader();
	searcher = NewSearcher(reader);
	writer.Dispose();
	}

	[TearDown]
	public override void TearDown()
	{
	reader.Dispose();
	dir.Dispose();
	base.TearDown();
	}

	/// <summary>
	/// tests a pre-intersected automaton against the original </summary>
	[Test]
	public virtual void TestFiniteVersusInfinite()
	{
	for (int i = 0; i < numIterations; i++)
	{
	string reg = AutomatonTestUtil.RandomRegexp(Random);
	Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
	IList<BytesRef> matchedTerms = new List<BytesRef>();
	foreach (BytesRef t in terms)
	{
	if (BasicOperations.Run(automaton, t.Utf8ToString()))
	{
	matchedTerms.Add(t);
	}
	}

	Automaton alternate = BasicAutomata.MakeStringUnion(matchedTerms);
	//System.out.println("match " + matchedTerms.Size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
	//AutomatonTestUtil.minimizeSimple(alternate);
	//System.out.println("minmize done");
	AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
	AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
	CheckHits.CheckEqual(a1, searcher.Search(a1, 25).ScoreDocs, searcher.Search(a2, 25).ScoreDocs);
	}
	}

	/// <summary>
	/// seeks to every term accepted by some automata </summary>
	[Test]
	public virtual void TestSeeking()
	{
	for (int i = 0; i < numIterations; i++)
	{
	string reg = AutomatonTestUtil.RandomRegexp(Random);
	Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
	TermsEnum te = MultiFields.GetTerms(reader, "field").GetEnumerator();
	IList<BytesRef> unsortedTerms = new List<BytesRef>(terms);
	unsortedTerms.Shuffle(Random);

	foreach (BytesRef term in unsortedTerms)
	{
	if (BasicOperations.Run(automaton, term.Utf8ToString()))
	{
	// term is accepted
	if (Random.NextBoolean())
	{
	// seek exact
	Assert.IsTrue(te.SeekExact(term));
	}
	else
	{
	// seek ceil
	Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
	Assert.AreEqual(term, te.Term);
	}
	}
	}
	}
	}

	/// <summary>
	/// mixes up seek and next for all terms </summary>
	[Test]
	public virtual void TestSeekingAndNexting()
	{
	for (int i = 0; i < numIterations; i++)
	{
	TermsEnum te = MultiFields.GetTerms(reader, "field").GetEnumerator();

	foreach (BytesRef term in terms)
	{
	int c = Random.Next(3);
	if (c == 0)
	{
	Assert.IsTrue(te.MoveNext());
	Assert.AreEqual(term, te.Term);
	}
	else if (c == 1)
	{
	Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term));
	Assert.AreEqual(term, te.Term);
	}
	else
	{
	Assert.IsTrue(te.SeekExact(term));
	}
	}
	}
	}

	/// <summary>
	/// tests intersect: TODO start at a random term! </summary>
	[Test]
	public virtual void TestIntersect()
	{
	for (int i = 0; i < numIterations; i++)
	{
	string reg = AutomatonTestUtil.RandomRegexp(Random);
	Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton();
	CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false);
	TermsEnum te = MultiFields.GetTerms(reader, "field").Intersect(ca, null);
	Automaton expected = BasicOperations.Intersection(termsAutomaton, automaton);
	JCG.SortedSet<BytesRef> found = new JCG.SortedSet<BytesRef>();
	while (te.MoveNext())
	{
	found.Add(BytesRef.DeepCopyOf(te.Term));
	}

	Automaton actual = BasicAutomata.MakeStringUnion(found);
	Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
	}
	}
	}
	}