src/Lucene.Net.Tests/Index/Test2BTerms.cs - lucenenet - Git at Google

 using J2N.Collections.Generic.Extensions;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Documents;
 using Lucene.Net.Index.Extensions;
 using Lucene.Net.Search;
 using Lucene.Net.Store;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System;
 using System.Collections.Generic;
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using Console = Lucene.Net.Support.SystemConsole;

 namespace Lucene.Net.Index
 {
     /*
          * Licensed to the Apache Software Foundation (ASF) under one or more
          * contributor license agreements.  See the NOTICE file distributed with
          * this work for additional information regarding copyright ownership.
          * The ASF licenses this file to You under the Apache License, Version 2.0
          * (the "License"); you may not use this file except in compliance with
          * the License.  You may obtain a copy of the License at
          *
          *     http://www.apache.org/licenses/LICENSE-2.0
          *
          * Unless required by applicable law or agreed to in writing, software
          * distributed under the License is distributed on an "AS IS" BASIS,
          * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
          * See the License for the specific language governing permissions and
          * limitations under the License.
          */


     using Codec = Lucene.Net.Codecs.Codec;

     // NOTE: this test will fail w/ PreFlexRW codec!  (Because
     // this test uses full binary term space, but PreFlex cannot
     // handle this since it requires the terms are UTF8 bytes).
     //
     // Also, SimpleText codec will consume very large amounts of
     // disk (but, should run successfully).  Best to run w/
     // -Dtests.codec=Standard, and w/ plenty of RAM, eg:
     //
     //   ant test -Dtest.slow=true -Dtests.heapsize=8g
     //
     //   java -server -Xmx8g -d64 -cp .:lib/junit-4.10.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore Lucene.Net.Index.Test2BTerms
     //
     [SuppressCodecs("SimpleText", "Memory", "Direct")]
     [Ignore("SimpleText codec will consume very large amounts of memory.")]
     [TestFixture]
     public class Test2BTerms : LuceneTestCase
     {
         private const int TOKEN_LEN = 5;

         private static readonly BytesRef Bytes = new BytesRef(TOKEN_LEN);

         private sealed class MyTokenStream : TokenStream
         {
             internal readonly int TokensPerDoc;
             internal int TokenCount;
             public readonly IList<BytesRef> SavedTerms = new List<BytesRef>();
             internal int NextSave;
             internal long TermCounter;
             internal readonly Random Random;

             public MyTokenStream(Random random, int tokensPerDoc)
                 : base(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY))
             {
                 this.TokensPerDoc = tokensPerDoc;
                 AddAttribute<ITermToBytesRefAttribute>();
                 Bytes.Length = TOKEN_LEN;
                 this.Random = random;
                 NextSave = TestUtil.NextInt32(random, 500000, 1000000);
             }

             public override bool IncrementToken()
             {
                 ClearAttributes();
                 if (TokenCount >= TokensPerDoc)
                 {
                     return false;
                 }
                 int shift = 32;
                 for (int i = 0; i < 5; i++)
                 {
                     Bytes.Bytes[i] = unchecked((byte)((TermCounter >> shift) & 0xFF));
                     shift -= 8;
                 }
                 TermCounter++;
                 TokenCount++;
                 if (--NextSave == 0)
                 {
                     SavedTerms.Add(BytesRef.DeepCopyOf(Bytes));
                     Console.WriteLine("TEST: save term=" + Bytes);
                     NextSave = TestUtil.NextInt32(Random, 500000, 1000000);
                 }
                 return true;
             }

             public override void Reset()
             {
                 TokenCount = 0;
             }

             private sealed class MyTermAttributeImpl : Util.Attribute, ITermToBytesRefAttribute
             {
                 public void FillBytesRef()
                 {
                     // no-op: the bytes was already filled by our owner's incrementToken
                 }

                 public BytesRef BytesRef
                 {
                     get
                     {
                         return Bytes;
                     }
                 }

                 public override void Clear()
                 {
                 }

                 public override bool Equals(object other)
                 {
                     return other == this;
                 }

                 public override int GetHashCode()
                 {
                     return RuntimeHelpers.GetHashCode(this);
                 }

                 public override void CopyTo(IAttribute target)
                 {
                 }

                 public override object Clone()
                 {
                     throw new System.NotSupportedException();
                 }
             }

             private sealed class MyAttributeFactory : AttributeFactory
             {
                 internal readonly AttributeFactory @delegate;

                 public MyAttributeFactory(AttributeFactory @delegate)
                 {
                     this.@delegate = @delegate;
                 }

                 public override Util.Attribute CreateAttributeInstance<T>()
                 {
                     var attClass = typeof(T);
                     if (attClass == typeof(ITermToBytesRefAttribute))
                     {
                         return new MyTermAttributeImpl();
                     }
                     if (attClass.GetTypeInfo().IsSubclassOf(typeof(CharTermAttribute)))
                     {
                         throw new System.ArgumentException("no");
                     }
                     return @delegate.CreateAttributeInstance<T>();
                 }
             }
         }

         [Ignore("Very slow. Enable manually by removing Ignore.")]
         [Test]
         public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")]Func<IConcurrentMergeScheduler> newScheduler)
         {
             if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal))
             {
                 throw new Exception("this test cannot run with PreFlex codec");
             }
             Console.WriteLine("Starting Test2B");
             long TERM_COUNT = ((long)int.MaxValue) + 100000000;

             int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000);

             IList<BytesRef> savedTerms = null;

             BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));
             //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
             if (dir is MockDirectoryWrapper)
             {
                 ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
             }
             dir.CheckIndexOnDispose = false; // don't double-checkindex

             if (true)
             {
                 IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                            .SetRAMBufferSizeMB(256.0)
                                            .SetMergeScheduler(newScheduler())
                                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                                            .SetOpenMode(OpenMode.CREATE));

                 MergePolicy mp = w.Config.MergePolicy;
                 if (mp is LogByteSizeMergePolicy)
                 {
                     // 1 petabyte:
                     ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
                 }

                 Documents.Document doc = new Documents.Document();
                 MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC);

                 FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                 customType.IndexOptions = IndexOptions.DOCS_ONLY;
                 customType.OmitNorms = true;
                 Field field = new Field("field", ts, customType);
                 doc.Add(field);
                 //w.setInfoStream(System.out);
                 int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

                 Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
                 Console.WriteLine("numDocs=" + numDocs);

                 for (int i = 0; i < numDocs; i++)
                 {
                     long t0 = Environment.TickCount;
                     w.AddDocument(doc);
                     Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
                 }
                 savedTerms = ts.SavedTerms;

                 Console.WriteLine("TEST: full merge");
                 w.ForceMerge(1);
                 Console.WriteLine("TEST: close writer");
                 w.Dispose();
             }

             Console.WriteLine("TEST: open reader");
             IndexReader r = DirectoryReader.Open(dir);
             if (savedTerms == null)
             {
                 savedTerms = FindTerms(r);
             }
             int numSavedTerms = savedTerms.Count;
             IList<BytesRef> bigOrdTerms = new List<BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));
             Console.WriteLine("TEST: test big ord terms...");
             TestSavedTerms(r, bigOrdTerms);
             Console.WriteLine("TEST: test all saved terms...");
             TestSavedTerms(r, savedTerms);
             r.Dispose();

             Console.WriteLine("TEST: now CheckIndex...");
             CheckIndex.Status status = TestUtil.CheckIndex(dir);
             long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;
             Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

             dir.Dispose();
             Console.WriteLine("TEST: done!");
         }

         private IList<BytesRef> FindTerms(IndexReader r)
         {
             Console.WriteLine("TEST: findTerms");
             TermsEnum termsEnum = MultiFields.GetTerms(r, "field").GetIterator(null);
             IList<BytesRef> savedTerms = new List<BytesRef>();
             int nextSave = TestUtil.NextInt32(Random, 500000, 1000000);
             BytesRef term;
             while ((term = termsEnum.Next()) != null)
             {
                 if (--nextSave == 0)
                 {
                     savedTerms.Add(BytesRef.DeepCopyOf(term));
                     Console.WriteLine("TEST: add " + term);
                     nextSave = TestUtil.NextInt32(Random, 500000, 1000000);
                 }
             }
             return savedTerms;
         }

         private void TestSavedTerms(IndexReader r, IList<BytesRef> terms)
         {
             Console.WriteLine("TEST: run " + terms.Count + " terms on reader=" + r);
             IndexSearcher s = NewSearcher(r);
             terms.Shuffle();
             TermsEnum termsEnum = MultiFields.GetTerms(r, "field").GetIterator(null);
             bool failed = false;
             for (int iter = 0; iter < 10 * terms.Count; iter++)
             {
                 BytesRef term = terms[Random.Next(terms.Count)];
                 Console.WriteLine("TEST: search " + term);
                 long t0 = Environment.TickCount;
                 int count = s.Search(new TermQuery(new Term("field", term)), 1).TotalHits;
                 if (count <= 0)
                 {
                     Console.WriteLine("  FAILED: count=" + count);
                     failed = true;
                 }
                 long t1 = Environment.TickCount;
                 Console.WriteLine("  took " + (t1 - t0) + " millis");

                 TermsEnum.SeekStatus result = termsEnum.SeekCeil(term);
                 if (result != TermsEnum.SeekStatus.FOUND)
                 {
                     if (result == TermsEnum.SeekStatus.END)
                     {
                         Console.WriteLine("  FAILED: got END");
                     }
                     else
                     {
                         Console.WriteLine("  FAILED: wrong term: got " + termsEnum.Term);
                     }
                     failed = true;
                 }
             }
             Assert.IsFalse(failed);
         }
     }
 }
	using J2N.Collections.Generic.Extensions;
	using Lucene.Net.Analysis;
	using Lucene.Net.Analysis.TokenAttributes;
	using Lucene.Net.Documents;
	using Lucene.Net.Index.Extensions;
	using Lucene.Net.Search;
	using Lucene.Net.Store;
	using Lucene.Net.Support;
	using Lucene.Net.Util;
	using NUnit.Framework;
	using System;
	using System.Collections.Generic;
	using System.Reflection;
	using System.Runtime.CompilerServices;
	using Console = Lucene.Net.Support.SystemConsole;

	namespace Lucene.Net.Index
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/


	using Codec = Lucene.Net.Codecs.Codec;

	// NOTE: this test will fail w/ PreFlexRW codec! (Because
	// this test uses full binary term space, but PreFlex cannot
	// handle this since it requires the terms are UTF8 bytes).
	//
	// Also, SimpleText codec will consume very large amounts of
	// disk (but, should run successfully). Best to run w/
	// -Dtests.codec=Standard, and w/ plenty of RAM, eg:
	//
	// ant test -Dtest.slow=true -Dtests.heapsize=8g
	//
	// java -server -Xmx8g -d64 -cp .:lib/junit-4.10.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore Lucene.Net.Index.Test2BTerms
	//
	[SuppressCodecs("SimpleText", "Memory", "Direct")]
	[Ignore("SimpleText codec will consume very large amounts of memory.")]
	[TestFixture]
	public class Test2BTerms : LuceneTestCase
	{
	private const int TOKEN_LEN = 5;

	private static readonly BytesRef Bytes = new BytesRef(TOKEN_LEN);

	private sealed class MyTokenStream : TokenStream
	{
	internal readonly int TokensPerDoc;
	internal int TokenCount;
	public readonly IList<BytesRef> SavedTerms = new List<BytesRef>();
	internal int NextSave;
	internal long TermCounter;
	internal readonly Random Random;

	public MyTokenStream(Random random, int tokensPerDoc)
	: base(new MyAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY))
	{
	this.TokensPerDoc = tokensPerDoc;
	AddAttribute<ITermToBytesRefAttribute>();
	Bytes.Length = TOKEN_LEN;
	this.Random = random;
	NextSave = TestUtil.NextInt32(random, 500000, 1000000);
	}

	public override bool IncrementToken()
	{
	ClearAttributes();
	if (TokenCount >= TokensPerDoc)
	{
	return false;
	}
	int shift = 32;
	for (int i = 0; i < 5; i++)
	{
	Bytes.Bytes[i] = unchecked((byte)((TermCounter >> shift) & 0xFF));
	shift -= 8;
	}
	TermCounter++;
	TokenCount++;
	if (--NextSave == 0)
	{
	SavedTerms.Add(BytesRef.DeepCopyOf(Bytes));
	Console.WriteLine("TEST: save term=" + Bytes);
	NextSave = TestUtil.NextInt32(Random, 500000, 1000000);
	}
	return true;
	}

	public override void Reset()
	{
	TokenCount = 0;
	}

	private sealed class MyTermAttributeImpl : Util.Attribute, ITermToBytesRefAttribute
	{
	public void FillBytesRef()
	{
	// no-op: the bytes was already filled by our owner's incrementToken
	}

	public BytesRef BytesRef
	{
	get
	{
	return Bytes;
	}
	}

	public override void Clear()
	{
	}

	public override bool Equals(object other)
	{
	return other == this;
	}

	public override int GetHashCode()
	{
	return RuntimeHelpers.GetHashCode(this);
	}

	public override void CopyTo(IAttribute target)
	{
	}

	public override object Clone()
	{
	throw new System.NotSupportedException();
	}
	}

	private sealed class MyAttributeFactory : AttributeFactory
	{
	internal readonly AttributeFactory @delegate;

	public MyAttributeFactory(AttributeFactory @delegate)
	{
	this.@delegate = @delegate;
	}

	public override Util.Attribute CreateAttributeInstance<T>()
	{
	var attClass = typeof(T);
	if (attClass == typeof(ITermToBytesRefAttribute))
	{
	return new MyTermAttributeImpl();
	}
	if (attClass.GetTypeInfo().IsSubclassOf(typeof(CharTermAttribute)))
	{
	throw new System.ArgumentException("no");
	}
	return @delegate.CreateAttributeInstance<T>();
	}
	}
	}

	[Ignore("Very slow. Enable manually by removing Ignore.")]
	[Test]
	public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")]Func<IConcurrentMergeScheduler> newScheduler)
	{
	if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal))
	{
	throw new Exception("this test cannot run with PreFlex codec");
	}
	Console.WriteLine("Starting Test2B");
	long TERM_COUNT = ((long)int.MaxValue) + 100000000;

	int TERMS_PER_DOC = TestUtil.NextInt32(Random, 100000, 1000000);

	IList<BytesRef> savedTerms = null;

	BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms"));
	//MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex"));
	if (dir is MockDirectoryWrapper)
	{
	((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
	}
	dir.CheckIndexOnDispose = false; // don't double-checkindex

	if (true)
	{
	IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
	.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
	.SetRAMBufferSizeMB(256.0)
	.SetMergeScheduler(newScheduler())
	.SetMergePolicy(NewLogMergePolicy(false, 10))
	.SetOpenMode(OpenMode.CREATE));

	MergePolicy mp = w.Config.MergePolicy;
	if (mp is LogByteSizeMergePolicy)
	{
	// 1 petabyte:
	((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
	}

	Documents.Document doc = new Documents.Document();
	MyTokenStream ts = new MyTokenStream(Random, TERMS_PER_DOC);

	FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
	customType.IndexOptions = IndexOptions.DOCS_ONLY;
	customType.OmitNorms = true;
	Field field = new Field("field", ts, customType);
	doc.Add(field);
	//w.setInfoStream(System.out);
	int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC);

	Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC);
	Console.WriteLine("numDocs=" + numDocs);

	for (int i = 0; i < numDocs; i++)
	{
	long t0 = Environment.TickCount;
	w.AddDocument(doc);
	Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec");
	}
	savedTerms = ts.SavedTerms;

	Console.WriteLine("TEST: full merge");
	w.ForceMerge(1);
	Console.WriteLine("TEST: close writer");
	w.Dispose();
	}

	Console.WriteLine("TEST: open reader");
	IndexReader r = DirectoryReader.Open(dir);
	if (savedTerms == null)
	{
	savedTerms = FindTerms(r);
	}
	int numSavedTerms = savedTerms.Count;
	IList<BytesRef> bigOrdTerms = new List<BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms));
	Console.WriteLine("TEST: test big ord terms...");
	TestSavedTerms(r, bigOrdTerms);
	Console.WriteLine("TEST: test all saved terms...");
	TestSavedTerms(r, savedTerms);
	r.Dispose();

	Console.WriteLine("TEST: now CheckIndex...");
	CheckIndex.Status status = TestUtil.CheckIndex(dir);
	long tc = status.SegmentInfos[0].TermIndexStatus.TermCount;
	Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue);

	dir.Dispose();
	Console.WriteLine("TEST: done!");
	}

	private IList<BytesRef> FindTerms(IndexReader r)
	{
	Console.WriteLine("TEST: findTerms");
	TermsEnum termsEnum = MultiFields.GetTerms(r, "field").GetIterator(null);
	IList<BytesRef> savedTerms = new List<BytesRef>();
	int nextSave = TestUtil.NextInt32(Random, 500000, 1000000);
	BytesRef term;
	while ((term = termsEnum.Next()) != null)
	{
	if (--nextSave == 0)
	{
	savedTerms.Add(BytesRef.DeepCopyOf(term));
	Console.WriteLine("TEST: add " + term);
	nextSave = TestUtil.NextInt32(Random, 500000, 1000000);
	}
	}
	return savedTerms;
	}

	private void TestSavedTerms(IndexReader r, IList<BytesRef> terms)
	{
	Console.WriteLine("TEST: run " + terms.Count + " terms on reader=" + r);
	IndexSearcher s = NewSearcher(r);
	terms.Shuffle();
	TermsEnum termsEnum = MultiFields.GetTerms(r, "field").GetIterator(null);
	bool failed = false;
	for (int iter = 0; iter < 10 * terms.Count; iter++)
	{
	BytesRef term = terms[Random.Next(terms.Count)];
	Console.WriteLine("TEST: search " + term);
	long t0 = Environment.TickCount;
	int count = s.Search(new TermQuery(new Term("field", term)), 1).TotalHits;
	if (count <= 0)
	{
	Console.WriteLine(" FAILED: count=" + count);
	failed = true;
	}
	long t1 = Environment.TickCount;
	Console.WriteLine(" took " + (t1 - t0) + " millis");

	TermsEnum.SeekStatus result = termsEnum.SeekCeil(term);
	if (result != TermsEnum.SeekStatus.FOUND)
	{
	if (result == TermsEnum.SeekStatus.END)
	{
	Console.WriteLine(" FAILED: got END");
	}
	else
	{
	Console.WriteLine(" FAILED: wrong term: got " + termsEnum.Term);
	}
	failed = true;
	}
	}
	Assert.IsFalse(failed);
	}
	}
	}