src/Lucene.Net.Tests/Index/TestDuelingCodecs.cs - lucenenet - Git at Google

 using J2N.Text;
 using Lucene.Net.Documents;
 using Lucene.Net.Index.Extensions;
 using NUnit.Framework;
 using System;
 using System.Text.RegularExpressions;

 namespace Lucene.Net.Index
 {
     using BytesRef = Lucene.Net.Util.BytesRef;
     using Codec = Lucene.Net.Codecs.Codec;
     using Directory = Lucene.Net.Store.Directory;
     using Document = Documents.Document;
     using LineFileDocs = Lucene.Net.Util.LineFileDocs;
     using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;

     /*
          * Licensed to the Apache Software Foundation (ASF) under one or more
          * contributor license agreements.  See the NOTICE file distributed with
          * this work for additional information regarding copyright ownership.
          * The ASF licenses this file to You under the Apache License, Version 2.0
          * (the "License"); you may not use this file except in compliance with
          * the License.  You may obtain a copy of the License at
          *
          *     http://www.apache.org/licenses/LICENSE-2.0
          *
          * Unless required by applicable law or agreed to in writing, software
          * distributed under the License is distributed on an "AS IS" BASIS,
          * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
          * See the License for the specific language governing permissions and
          * limitations under the License.
          */

     using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
     using NumericDocValuesField = NumericDocValuesField;
     using SortedSetDocValuesField = SortedSetDocValuesField;
     using TestUtil = Lucene.Net.Util.TestUtil;

     /// <summary>
     /// Compares one codec against another
     /// </summary>
     [TestFixture]
     public class TestDuelingCodecs : LuceneTestCase
     {
         private Directory LeftDir;
         private IndexReader LeftReader;
         private Codec LeftCodec;

         private Directory RightDir;
         private IndexReader RightReader;
         private Codec RightCodec;

         private string Info; // for debugging

         [SetUp]
         public override void SetUp()
         {
             base.SetUp();

             // for now its SimpleText vs Lucene46(random postings format)
             // as this gives the best overall coverage. when we have more
             // codecs we should probably pick 2 from Codec.availableCodecs()

             LeftCodec = Codec.ForName("SimpleText");
             RightCodec = new RandomCodec(Random);

             LeftDir = NewDirectory();
             RightDir = NewDirectory();

             long seed = Random.Next();

             // must use same seed because of random payloads, etc
             int maxTermLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
             MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random((int)seed));
             leftAnalyzer.MaxTokenLength = maxTermLength;
             MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random((int)seed));
             rightAnalyzer.MaxTokenLength = maxTermLength;

             // but these can be different
             // TODO: this turns this into a really big test of Multi*, is that what we want?
             IndexWriterConfig leftConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);
             leftConfig.SetCodec(LeftCodec);
             // preserve docids
             leftConfig.SetMergePolicy(NewLogMergePolicy());

             IndexWriterConfig rightConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer);
             rightConfig.SetCodec(RightCodec);
             // preserve docids
             rightConfig.SetMergePolicy(NewLogMergePolicy());

             // must use same seed because of random docvalues fields, etc
             RandomIndexWriter leftWriter = new RandomIndexWriter(new Random((int)seed), LeftDir, leftConfig);
             RandomIndexWriter rightWriter = new RandomIndexWriter(new Random((int)seed), RightDir, rightConfig);

             int numdocs = AtLeast(100);
             CreateRandomIndex(numdocs, leftWriter, seed);
             CreateRandomIndex(numdocs, rightWriter, seed);

             LeftReader = MaybeWrapReader(leftWriter.GetReader());
             leftWriter.Dispose();
             RightReader = MaybeWrapReader(rightWriter.GetReader());
             rightWriter.Dispose();

             // check that our readers are valid
             TestUtil.CheckReader(LeftReader);
             TestUtil.CheckReader(RightReader);

             Info = "left: " + LeftCodec.ToString() + " / right: " + RightCodec.ToString();
         }

         [TearDown]
         public override void TearDown()
         {
             if (LeftReader != null)
             {
                 LeftReader.Dispose();
             }
             if (RightReader != null)
             {
                 RightReader.Dispose();
             }

             if (LeftDir != null)
             {
                 LeftDir.Dispose();
             }
             if (RightDir != null)
             {
                 RightDir.Dispose();
             }

             base.TearDown();
         }

         /// <summary>
         /// populates a writer with random stuff. this must be fully reproducable with the seed!
         /// </summary>
         public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed)
         {
             Random random = new Random((int)seed);
             // primary source for our data is from linefiledocs, its realistic.
             LineFileDocs lineFileDocs = new LineFileDocs(random);

             // LUCENENET: compile a regex so we don't have to do it in each loop (for regex.split())
             Regex whiteSpace = new Regex("\\s+", RegexOptions.Compiled);

             // TODO: we should add other fields that use things like docs&freqs but omit positions,
             // because linefiledocs doesn't cover all the possibilities.
             for (int i = 0; i < numdocs; i++)
             {
                 Document document = lineFileDocs.NextDoc();
                 // grab the title and add some SortedSet instances for fun
                 string title = document.Get("titleTokenized");
                 string[] split = whiteSpace.Split(title).TrimEnd();
                 foreach (string trash in split)
                 {
                     document.Add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
                 }
                 // add a numeric dv field sometimes
                 document.RemoveFields("sparsenumeric");
                 if (random.Next(4) == 2)
                 {
                     document.Add(new NumericDocValuesField("sparsenumeric", random.Next()));
                 }
                 writer.AddDocument(document);
             }

             lineFileDocs.Dispose();
         }

         /// <summary>
         /// checks the two indexes are equivalent
         /// </summary>
         [Test]
         public virtual void TestEquals()
         {
             AssertReaderEquals(Info, LeftReader, RightReader);
         }
     }
 }
	using J2N.Text;
	using Lucene.Net.Documents;
	using Lucene.Net.Index.Extensions;
	using NUnit.Framework;
	using System;
	using System.Text.RegularExpressions;

	namespace Lucene.Net.Index
	{
	using BytesRef = Lucene.Net.Util.BytesRef;
	using Codec = Lucene.Net.Codecs.Codec;
	using Directory = Lucene.Net.Store.Directory;
	using Document = Documents.Document;
	using LineFileDocs = Lucene.Net.Util.LineFileDocs;
	using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
	using NumericDocValuesField = NumericDocValuesField;
	using SortedSetDocValuesField = SortedSetDocValuesField;
	using TestUtil = Lucene.Net.Util.TestUtil;

	/// <summary>
	/// Compares one codec against another
	/// </summary>
	[TestFixture]
	public class TestDuelingCodecs : LuceneTestCase
	{
	private Directory LeftDir;
	private IndexReader LeftReader;
	private Codec LeftCodec;

	private Directory RightDir;
	private IndexReader RightReader;
	private Codec RightCodec;

	private string Info; // for debugging

	[SetUp]
	public override void SetUp()
	{
	base.SetUp();

	// for now its SimpleText vs Lucene46(random postings format)
	// as this gives the best overall coverage. when we have more
	// codecs we should probably pick 2 from Codec.availableCodecs()

	LeftCodec = Codec.ForName("SimpleText");
	RightCodec = new RandomCodec(Random);

	LeftDir = NewDirectory();
	RightDir = NewDirectory();

	long seed = Random.Next();

	// must use same seed because of random payloads, etc
	int maxTermLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
	MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random((int)seed));
	leftAnalyzer.MaxTokenLength = maxTermLength;
	MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random((int)seed));
	rightAnalyzer.MaxTokenLength = maxTermLength;

	// but these can be different
	// TODO: this turns this into a really big test of Multi*, is that what we want?
	IndexWriterConfig leftConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);
	leftConfig.SetCodec(LeftCodec);
	// preserve docids
	leftConfig.SetMergePolicy(NewLogMergePolicy());

	IndexWriterConfig rightConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, rightAnalyzer);
	rightConfig.SetCodec(RightCodec);
	// preserve docids
	rightConfig.SetMergePolicy(NewLogMergePolicy());

	// must use same seed because of random docvalues fields, etc
	RandomIndexWriter leftWriter = new RandomIndexWriter(new Random((int)seed), LeftDir, leftConfig);
	RandomIndexWriter rightWriter = new RandomIndexWriter(new Random((int)seed), RightDir, rightConfig);

	int numdocs = AtLeast(100);
	CreateRandomIndex(numdocs, leftWriter, seed);
	CreateRandomIndex(numdocs, rightWriter, seed);

	LeftReader = MaybeWrapReader(leftWriter.GetReader());
	leftWriter.Dispose();
	RightReader = MaybeWrapReader(rightWriter.GetReader());
	rightWriter.Dispose();

	// check that our readers are valid
	TestUtil.CheckReader(LeftReader);
	TestUtil.CheckReader(RightReader);

	Info = "left: " + LeftCodec.ToString() + " / right: " + RightCodec.ToString();
	}

	[TearDown]
	public override void TearDown()
	{
	if (LeftReader != null)
	{
	LeftReader.Dispose();
	}
	if (RightReader != null)
	{
	RightReader.Dispose();
	}

	if (LeftDir != null)
	{
	LeftDir.Dispose();
	}
	if (RightDir != null)
	{
	RightDir.Dispose();
	}

	base.TearDown();
	}

	/// <summary>
	/// populates a writer with random stuff. this must be fully reproducable with the seed!
	/// </summary>
	public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed)
	{
	Random random = new Random((int)seed);
	// primary source for our data is from linefiledocs, its realistic.
	LineFileDocs lineFileDocs = new LineFileDocs(random);

	// LUCENENET: compile a regex so we don't have to do it in each loop (for regex.split())
	Regex whiteSpace = new Regex("\\s+", RegexOptions.Compiled);

	// TODO: we should add other fields that use things like docs&freqs but omit positions,
	// because linefiledocs doesn't cover all the possibilities.
	for (int i = 0; i < numdocs; i++)
	{
	Document document = lineFileDocs.NextDoc();
	// grab the title and add some SortedSet instances for fun
	string title = document.Get("titleTokenized");
	string[] split = whiteSpace.Split(title).TrimEnd();
	foreach (string trash in split)
	{
	document.Add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
	}
	// add a numeric dv field sometimes
	document.RemoveFields("sparsenumeric");
	if (random.Next(4) == 2)
	{
	document.Add(new NumericDocValuesField("sparsenumeric", random.Next()));
	}
	writer.AddDocument(document);
	}

	lineFileDocs.Dispose();
	}

	/// <summary>
	/// checks the two indexes are equivalent
	/// </summary>
	[Test]
	public virtual void TestEquals()
	{
	AssertReaderEquals(Info, LeftReader, RightReader);
	}
	}
	}