blob: 4624df49b32a35f0725cf22758315b7c923e1b6c [file] [log] [blame]
using J2N.Collections.Generic.Extensions;
using J2N.Numerics;
using J2N.Text;
using J2N.Threading;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Diagnostics;
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Threading;
using Assert = Lucene.Net.TestFramework.Assert;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Directory = Lucene.Net.Store.Directory;
using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
using Document = Documents.Document;
using Field = Field;
using FieldType = FieldType;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using TermQuery = Lucene.Net.Search.TermQuery;
using TextField = TextField;
[TestFixture]
public class TestStressIndexing2 : LuceneTestCase
{
private static int maxFields = 4;
private static int bigFieldSize = 10;
private static bool sameFieldOrder = false;
private static int mergeFactor = 3;
private static int maxBufferedDocs = 3;
private static int seed = 0;
public sealed class YieldTestPoint : ITestPoint
{
private readonly TestStressIndexing2 outerInstance;
public YieldTestPoint(TestStressIndexing2 outerInstance)
{
this.outerInstance = outerInstance;
}
public void Apply(string name)
{
// if (name.equals("startCommit")) {
if (Random.Next(4) == 2)
{
Thread.Sleep(0);
}
}
}
//
[Test]
public virtual void TestRandomIWReader()
{
Directory dir = NewDirectory();
// TODO: verify equals using IW.getReader
DocsAndWriter dw = IndexRandomIWReader(5, 3, 100, dir);
DirectoryReader reader = dw.writer.GetReader();
dw.writer.Commit();
VerifyEquals(Random, reader, dir, "id");
reader.Dispose();
dw.writer.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestRandom()
{
Directory dir1 = NewDirectory();
Directory dir2 = NewDirectory();
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
int maxThreadStates = 1 + Random.Next(10);
bool doReaderPooling = Random.NextBoolean();
IDictionary<string, Document> docs = IndexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling);
IndexSerial(Random, docs, dir2);
// verifying verify
// verifyEquals(dir1, dir1, "id");
// verifyEquals(dir2, dir2, "id");
VerifyEquals(dir1, dir2, "id");
dir1.Dispose();
dir2.Dispose();
}
[Test]
public virtual void TestMultiConfig()
{
// test lots of smaller different params together
int num = AtLeast(3);
for (int i = 0; i < num; i++) // increase iterations for better testing
{
if (Verbose)
{
Console.WriteLine("\n\nTEST: top iter=" + i);
}
sameFieldOrder = Random.NextBoolean();
mergeFactor = Random.Next(3) + 2;
maxBufferedDocs = Random.Next(3) + 2;
int maxThreadStates = 1 + Random.Next(10);
bool doReaderPooling = Random.NextBoolean();
seed++;
int nThreads = Random.Next(5) + 1;
int iter = Random.Next(5) + 1;
int range = Random.Next(20) + 1;
Directory dir1 = NewDirectory();
Directory dir2 = NewDirectory();
if (Verbose)
{
Console.WriteLine(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor + " maxBufferedDocs=" + maxBufferedDocs);
}
IDictionary<string, Document> docs = IndexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling);
if (Verbose)
{
Console.WriteLine("TEST: index serial");
}
IndexSerial(Random, docs, dir2);
if (Verbose)
{
Console.WriteLine("TEST: verify");
}
VerifyEquals(dir1, dir2, "id");
dir1.Dispose();
dir2.Dispose();
}
}
internal static Term idTerm = new Term("id", "");
internal IndexingThread[] threads;
internal static IComparer<IIndexableField> fieldNameComparer = Comparer<IIndexableField>.Create((o1, o2) => o1.Name.CompareToOrdinal(o2.Name));
// this test avoids using any extra synchronization in the multiple
// indexing threads to test that IndexWriter does correctly synchronize
// everything.
public class DocsAndWriter
{
internal IDictionary<string, Document> docs;
internal IndexWriter writer;
}
public virtual DocsAndWriter IndexRandomIWReader(int nThreads, int iterations, int range, Directory dir)
{
IDictionary<string, Document> docs = new Dictionary<string, Document>();
IndexWriter w = RandomIndexWriter.MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetRAMBufferSizeMB(0.1).SetMaxBufferedDocs(maxBufferedDocs).SetMergePolicy(NewLogMergePolicy()), new YieldTestPoint(this));
w.Commit();
LogMergePolicy lmp = (LogMergePolicy)w.Config.MergePolicy;
lmp.NoCFSRatio = 0.0;
lmp.MergeFactor = mergeFactor;
/*
/// w.setMaxMergeDocs(Integer.MAX_VALUE);
/// w.setMaxFieldLength(10000);
/// w.SetRAMBufferSizeMB(1);
/// w.setMergeFactor(10);
*/
threads = new IndexingThread[nThreads];
for (int i = 0; i < threads.Length; i++)
{
IndexingThread th = new IndexingThread(this);
th.w = w;
th.@base = 1000000 * i;
th.range = range;
th.iterations = iterations;
threads[i] = th;
}
for (int i = 0; i < threads.Length; i++)
{
threads[i].Start();
}
for (int i = 0; i < threads.Length; i++)
{
threads[i].Join();
}
// w.ForceMerge(1);
//w.Dispose();
for (int i = 0; i < threads.Length; i++)
{
IndexingThread th = threads[i];
lock (th)
{
docs.PutAll(th.docs);
}
}
TestUtil.CheckIndex(dir);
DocsAndWriter dw = new DocsAndWriter();
dw.docs = docs;
dw.writer = w;
return dw;
}
public virtual IDictionary<string, Document> IndexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates, bool doReaderPooling)
{
IDictionary<string, Document> docs = new Dictionary<string, Document>();
IndexWriter w = RandomIndexWriter.MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetRAMBufferSizeMB(0.1).SetMaxBufferedDocs(maxBufferedDocs).SetIndexerThreadPool(new DocumentsWriterPerThreadPool(maxThreadStates)).SetReaderPooling(doReaderPooling).SetMergePolicy(NewLogMergePolicy()), new YieldTestPoint(this));
LogMergePolicy lmp = (LogMergePolicy)w.Config.MergePolicy;
lmp.NoCFSRatio = 0.0;
lmp.MergeFactor = mergeFactor;
threads = new IndexingThread[nThreads];
for (int i = 0; i < threads.Length; i++)
{
IndexingThread th = new IndexingThread(this);
th.w = w;
th.@base = 1000000 * i;
th.range = range;
th.iterations = iterations;
threads[i] = th;
}
for (int i = 0; i < threads.Length; i++)
{
threads[i].Start();
}
for (int i = 0; i < threads.Length; i++)
{
threads[i].Join();
}
//w.ForceMerge(1);
w.Dispose();
for (int i = 0; i < threads.Length; i++)
{
IndexingThread th = threads[i];
lock (th)
{
docs.PutAll(th.docs);
}
}
//System.out.println("TEST: checkindex");
TestUtil.CheckIndex(dir);
return docs;
}
/// <summary>
/// LUCENENET specific
/// Is non-static because NewIndexWriterConfig is no longer static.
/// </summary>
public void IndexSerial(Random random, IDictionary<string, Document> docs, Directory dir)
{
IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
// index all docs in a single thread
IEnumerator<Document> iter = docs.Values.GetEnumerator();
while (iter.MoveNext())
{
Document d = iter.Current;
List<IIndexableField> fields = new List<IIndexableField>();
fields.AddRange(d.Fields);
// put fields in same order each time
fields.Sort(fieldNameComparer);
Document d1 = new Document();
for (int i = 0; i < fields.Count; i++)
{
d1.Add(fields[i]);
}
w.AddDocument(d1);
// System.out.println("indexing "+d1);
}
w.Dispose();
}
public virtual void VerifyEquals(Random r, DirectoryReader r1, Directory dir2, string idField)
{
DirectoryReader r2 = DirectoryReader.Open(dir2);
VerifyEquals(r1, r2, idField);
r2.Dispose();
}
public virtual void VerifyEquals(Directory dir1, Directory dir2, string idField)
{
DirectoryReader r1 = DirectoryReader.Open(dir1);
DirectoryReader r2 = DirectoryReader.Open(dir2);
VerifyEquals(r1, r2, idField);
r1.Dispose();
r2.Dispose();
}
private static void PrintDocs(DirectoryReader r)
{
foreach (AtomicReaderContext ctx in r.Leaves)
{
// TODO: improve this
AtomicReader sub = (AtomicReader)ctx.Reader;
IBits liveDocs = sub.LiveDocs;
Console.WriteLine(" " + ((SegmentReader)sub).SegmentInfo);
for (int docID = 0; docID < sub.MaxDoc; docID++)
{
Document doc = sub.Document(docID);
if (liveDocs == null || liveDocs.Get(docID))
{
Console.WriteLine(" docID=" + docID + " id:" + doc.Get("id"));
}
else
{
Console.WriteLine(" DEL docID=" + docID + " id:" + doc.Get("id"));
}
}
}
}
public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
{
if (Verbose)
{
Console.WriteLine("\nr1 docs:");
PrintDocs(r1);
Console.WriteLine("\nr2 docs:");
PrintDocs(r2);
}
if (r1.NumDocs != r2.NumDocs)
{
if (Debugging.AssertsEnabled) Debugging.Assert(false,"r1.NumDocs={0} vs r2.NumDocs={1}", r1.NumDocs, r2.NumDocs);
}
bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);
int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping
// create mapping from id2 space to id2 based on idField
Fields f1 = MultiFields.GetFields(r1);
if (f1 == null)
{
// make sure r2 is empty
Assert.IsNull(MultiFields.GetFields(r2));
return;
}
Terms terms1 = f1.GetTerms(idField);
if (terms1 == null)
{
Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
return;
}
TermsEnum termsEnum = terms1.GetEnumerator();
IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
IBits liveDocs2 = MultiFields.GetLiveDocs(r2);
Fields fields = MultiFields.GetFields(r2);
if (fields == null)
{
// make sure r1 is in fact empty (eg has only all
// deleted docs):
IBits liveDocs = MultiFields.GetLiveDocs(r1);
DocsEnum docs = null;
while (termsEnum.MoveNext())
{
docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE);
while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
{
Assert.Fail("r1 is not empty but r2 is");
}
}
return;
}
Terms terms2 = fields.GetTerms(idField);
TermsEnum termsEnum2 = terms2.GetEnumerator();
DocsEnum termDocs1 = null;
DocsEnum termDocs2 = null;
while (termsEnum.MoveNext())
{
BytesRef term = termsEnum.Term;
//System.out.println("TEST: match id term=" + term);
termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
if (termsEnum2.SeekExact(term))
{
termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
}
else
{
termDocs2 = null;
}
if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
{
// this doc is deleted and wasn't replaced
Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
continue;
}
int id1 = termDocs1.DocID;
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());
Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int id2 = termDocs2.DocID;
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());
r2r1[id2] = id1;
// verify stored fields are equivalent
try
{
VerifyEquals(r1.Document(id1), r2.Document(id2));
}
catch (Exception /*t*/)
{
Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
Console.WriteLine(" d1=" + r1.Document(id1));
Console.WriteLine(" d2=" + r2.Document(id2));
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
try
{
// verify term vectors are equivalent
VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
}
catch (Exception /*e*/)
{
Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
Fields tv1 = r1.GetTermVectors(id1);
Console.WriteLine(" d1=" + tv1);
if (tv1 != null)
{
DocsAndPositionsEnum dpEnum = null;
DocsEnum dEnum = null;
foreach (string field in tv1)
{
Console.WriteLine(" " + field + ":");
Terms terms3 = tv1.GetTerms(field);
Assert.IsNotNull(terms3);
TermsEnum termsEnum3 = terms3.GetEnumerator();
while (termsEnum3.MoveNext())
{
Console.WriteLine(" " + termsEnum3.Term.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
if (dpEnum != null)
{
Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int freq = dpEnum.Freq;
Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq);
for (int posUpto = 0; posUpto < freq; posUpto++)
{
Console.WriteLine(" pos=" + dpEnum.NextPosition());
}
}
else
{
dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
Assert.IsNotNull(dEnum);
Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int freq = dEnum.Freq;
Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq);
}
}
}
}
Fields tv2 = r2.GetTermVectors(id2);
Console.WriteLine(" d2=" + tv2);
if (tv2 != null)
{
DocsAndPositionsEnum dpEnum = null;
DocsEnum dEnum = null;
foreach (string field in tv2)
{
Console.WriteLine(" " + field + ":");
Terms terms3 = tv2.GetTerms(field);
Assert.IsNotNull(terms3);
TermsEnum termsEnum3 = terms3.GetEnumerator();
while (termsEnum3.MoveNext())
{
Console.WriteLine(" " + termsEnum3.Term.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
if (dpEnum != null)
{
Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int freq = dpEnum.Freq;
Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq);
for (int posUpto = 0; posUpto < freq; posUpto++)
{
Console.WriteLine(" pos=" + dpEnum.NextPosition());
}
}
else
{
dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
Assert.IsNotNull(dEnum);
Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int freq = dEnum.Freq;
Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq);
}
}
}
}
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
}
//System.out.println("TEST: done match id");
// Verify postings
//System.out.println("TEST: create te1");
Fields fields1 = MultiFields.GetFields(r1);
IEnumerator<string> fields1Enum = fields1.GetEnumerator();
Fields fields2 = MultiFields.GetFields(r2);
IEnumerator<string> fields2Enum = fields2.GetEnumerator();
string field1 = null, field2 = null;
TermsEnum termsEnum1 = null;
termsEnum2 = null;
DocsEnum docs1 = null, docs2 = null;
// pack both doc and freq into single element for easy sorting
long[] info1 = new long[r1.NumDocs];
long[] info2 = new long[r2.NumDocs];
for (; ; )
{
BytesRef term1 = null, term2 = null;
// iterate until we get some docs
int len1;
for (; ; )
{
len1 = 0;
if (termsEnum1 == null)
{
if (!fields1Enum.MoveNext())
{
break;
}
field1 = fields1Enum.Current;
Terms terms = fields1.GetTerms(field1);
if (terms == null)
{
continue;
}
termsEnum1 = terms.GetEnumerator();
}
if (!termsEnum1.MoveNext())
{
term1 = null;
// no more terms in this field
termsEnum1 = null;
continue;
}
term1 = termsEnum1.Term;
//System.out.println("TEST: term1=" + term1);
docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
{
int d = docs1.DocID;
int f = docs1.Freq;
info1[len1] = (((long)d) << 32) | (uint)f;
len1++;
}
if (len1 > 0)
{
break;
}
}
// iterate until we get some docs
int len2;
for (; ; )
{
len2 = 0;
if (termsEnum2 == null)
{
if (!fields2Enum.MoveNext())
{
break;
}
field2 = fields2Enum.Current;
Terms terms = fields2.GetTerms(field2);
if (terms == null)
{
continue;
}
termsEnum2 = terms.GetEnumerator();
}
if (!termsEnum2.MoveNext())
{
term2 = null;
// no more terms in this field
termsEnum2 = null;
continue;
}
term2 = termsEnum2.Term;
//System.out.println("TEST: term1=" + term1);
docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
{
int d = r2r1[docs2.DocID];
int f = docs2.Freq;
info2[len2] = (((long)d) << 32) | (uint)f;
len2++;
}
if (len2 > 0)
{
break;
}
}
Assert.AreEqual(len1, len2);
if (len1 == 0) // no more terms
{
break;
}
Assert.AreEqual(field1, field2);
Assert.IsTrue(term1.BytesEquals(term2));
if (!hasDeletes)
{
Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
}
Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);
// sort info2 to get it into ascending docid
Array.Sort(info2, 0, len2);
// now compare
for (int i = 0; i < len1; i++)
{
Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + (info1[i].TripleShift(32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + (info2[i].TripleShift(32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
}
}
}
public static void VerifyEquals(Document d1, Document d2)
{
IList<IIndexableField> ff1 = d1.Fields;
IList<IIndexableField> ff2 = d2.Fields;
ff1.Sort(fieldNameComparer);
ff2.Sort(fieldNameComparer);
Assert.AreEqual(ff1.Count, ff2.Count, ff1 + " : " + ff2);
for (int i = 0; i < ff1.Count; i++)
{
IIndexableField f1 = ff1[i];
IIndexableField f2 = ff2[i];
if (f1.GetBinaryValue() != null)
{
if (Debugging.AssertsEnabled) Debugging.Assert(f2.GetBinaryValue() != null);
}
else
{
string s1 = f1.GetStringValue();
string s2 = f2.GetStringValue();
Assert.AreEqual(s1, s2, ff1 + " : " + ff2);
}
}
}
public static void VerifyEquals(Fields d1, Fields d2)
{
if (d1 == null)
{
Assert.IsTrue(d2 == null || d2.Count == 0);
return;
}
Assert.IsTrue(d2 != null);
IEnumerator<string> fieldsEnum2 = d2.GetEnumerator();
foreach (string field1 in d1)
{
fieldsEnum2.MoveNext();
string field2 = fieldsEnum2.Current;
Assert.AreEqual(field1, field2);
Terms terms1 = d1.GetTerms(field1);
Assert.IsNotNull(terms1);
TermsEnum termsEnum1 = terms1.GetEnumerator();
Terms terms2 = d2.GetTerms(field2);
Assert.IsNotNull(terms2);
TermsEnum termsEnum2 = terms2.GetEnumerator();
DocsAndPositionsEnum dpEnum1 = null;
DocsAndPositionsEnum dpEnum2 = null;
DocsEnum dEnum1 = null;
DocsEnum dEnum2 = null;
BytesRef term1;
while (termsEnum1.MoveNext())
{
term1 = termsEnum1.Term;
termsEnum2.MoveNext();
BytesRef term2 = termsEnum2.Term;
Assert.AreEqual(term1, term2);
Assert.AreEqual(termsEnum1.TotalTermFreq, termsEnum2.TotalTermFreq);
dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1);
dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2);
if (dpEnum1 != null)
{
Assert.IsNotNull(dpEnum2);
int docID1 = dpEnum1.NextDoc();
dpEnum2.NextDoc();
// docIDs are not supposed to be equal
//int docID2 = dpEnum2.NextDoc();
//Assert.AreEqual(docID1, docID2);
Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
int freq1 = dpEnum1.Freq;
int freq2 = dpEnum2.Freq;
Assert.AreEqual(freq1, freq2);
IOffsetAttribute offsetAtt1 = dpEnum1.Attributes.HasAttribute<IOffsetAttribute>() ? dpEnum1.Attributes.GetAttribute<IOffsetAttribute>() : null;
IOffsetAttribute offsetAtt2 = dpEnum2.Attributes.HasAttribute<IOffsetAttribute>() ? dpEnum2.Attributes.GetAttribute<IOffsetAttribute>() : null;
if (offsetAtt1 != null)
{
Assert.IsNotNull(offsetAtt2);
}
else
{
Assert.IsNull(offsetAtt2);
}
for (int posUpto = 0; posUpto < freq1; posUpto++)
{
int pos1 = dpEnum1.NextPosition();
int pos2 = dpEnum2.NextPosition();
Assert.AreEqual(pos1, pos2);
if (offsetAtt1 != null)
{
Assert.AreEqual(offsetAtt1.StartOffset, offsetAtt2.StartOffset);
Assert.AreEqual(offsetAtt1.EndOffset, offsetAtt2.EndOffset);
}
}
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc());
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc());
}
else
{
dEnum1 = TestUtil.Docs(Random, termsEnum1, null, dEnum1, DocsFlags.FREQS);
dEnum2 = TestUtil.Docs(Random, termsEnum2, null, dEnum2, DocsFlags.FREQS);
Assert.IsNotNull(dEnum1);
Assert.IsNotNull(dEnum2);
int docID1 = dEnum1.NextDoc();
dEnum2.NextDoc();
// docIDs are not supposed to be equal
//int docID2 = dEnum2.NextDoc();
//Assert.AreEqual(docID1, docID2);
Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
int freq1 = dEnum1.Freq;
int freq2 = dEnum2.Freq;
Assert.AreEqual(freq1, freq2);
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc());
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc());
}
}
Assert.IsFalse(termsEnum2.MoveNext());
}
Assert.IsFalse(fieldsEnum2.MoveNext());
}
internal class IndexingThread : ThreadJob
{
private readonly TestStressIndexing2 outerInstance;
public IndexingThread(TestStressIndexing2 outerInstance)
{
this.outerInstance = outerInstance;
}
internal IndexWriter w;
internal int @base;
internal int range;
internal int iterations;
internal IDictionary<string, Document> docs = new Dictionary<string, Document>();
internal Random r;
public virtual int NextInt(int lim)
{
return r.Next(lim);
}
// start is inclusive and end is exclusive
public virtual int NextInt(int start, int end)
{
return start + r.Next(end - start);
}
internal char[] buffer = new char[100];
internal virtual int AddUTF8Token(int start)
{
int end = start + NextInt(20);
if (buffer.Length < 1 + end)
{
char[] newBuffer = new char[(int)((1 + end) * 1.25)];
Array.Copy(buffer, 0, newBuffer, 0, buffer.Length);
buffer = newBuffer;
}
for (int i = start; i < end; i++)
{
int t = NextInt(5);
if (0 == t && i < end - 1)
{
// Make a surrogate pair
// High surrogate
buffer[i++] = (char)NextInt(0xd800, 0xdc00);
// Low surrogate
buffer[i] = (char)NextInt(0xdc00, 0xe000);
}
else if (t <= 1)
{
buffer[i] = (char)NextInt(0x80);
}
else if (2 == t)
{
buffer[i] = (char)NextInt(0x80, 0x800);
}
else if (3 == t)
{
buffer[i] = (char)NextInt(0x800, 0xd800);
}
else if (4 == t)
{
buffer[i] = (char)NextInt(0xe000, 0xffff);
}
}
buffer[end] = ' ';
return 1 + end;
}
public virtual string GetString(int nTokens)
{
nTokens = nTokens != 0 ? nTokens : r.Next(4) + 1;
// Half the time make a random UTF8 string
if (r.NextBoolean())
{
return GetUTF8String(nTokens);
}
// avoid StringBuffer because it adds extra synchronization.
char[] arr = new char[nTokens * 2];
for (int i = 0; i < nTokens; i++)
{
arr[i * 2] = (char)('A' + r.Next(10));
arr[i * 2 + 1] = ' ';
}
return new string(arr);
}
public virtual string GetUTF8String(int nTokens)
{
int upto = 0;
Arrays.Fill(buffer, (char)0);
for (int i = 0; i < nTokens; i++)
{
upto = AddUTF8Token(upto);
}
return new string(buffer, 0, upto);
}
public virtual string IdString => Convert.ToString(@base + NextInt(range), CultureInfo.InvariantCulture);
public virtual void IndexDoc()
{
Document d = new Document();
FieldType customType1 = new FieldType(TextField.TYPE_STORED);
customType1.IsTokenized = false;
customType1.OmitNorms = true;
List<Field> fields = new List<Field>();
string idString = IdString;
Field idField = NewField("id", idString, customType1);
fields.Add(idField);
int nFields = NextInt(maxFields);
for (int i = 0; i < nFields; i++)
{
FieldType customType = new FieldType();
switch (NextInt(4))
{
case 0:
break;
case 1:
customType.StoreTermVectors = true;
break;
case 2:
customType.StoreTermVectors = true;
customType.StoreTermVectorPositions = true;
break;
case 3:
customType.StoreTermVectors = true;
customType.StoreTermVectorOffsets = true;
break;
}
switch (NextInt(4))
{
case 0:
customType.IsStored = true;
customType.OmitNorms = true;
customType.IsIndexed = true;
fields.Add(NewField("f" + NextInt(100), GetString(1), customType));
break;
case 1:
customType.IsIndexed = true;
customType.IsTokenized = true;
fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
break;
case 2:
customType.IsStored = true;
customType.StoreTermVectors = false;
customType.StoreTermVectorOffsets = false;
customType.StoreTermVectorPositions = false;
fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
break;
case 3:
customType.IsStored = true;
customType.IsIndexed = true;
customType.IsTokenized = true;
fields.Add(NewField("f" + NextInt(100), GetString(bigFieldSize), customType));
break;
}
}
if (sameFieldOrder)
{
fields.Sort(fieldNameComparer);
}
else
{
// random placement of id field also
fields.Swap(NextInt(fields.Count), 0);
}
for (int i = 0; i < fields.Count; i++)
{
d.Add(fields[i]);
}
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString);
}
w.UpdateDocument(new Term("id", idString), d);
//System.out.println(Thread.currentThread().getName() + ": indexing "+d);
docs[idString] = d;
}
public virtual void DeleteDoc()
{
string idString = IdString;
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": del id:" + idString);
}
w.DeleteDocuments(new Term("id", idString));
docs.Remove(idString);
}
public virtual void DeleteByQuery()
{
string idString = IdString;
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": del query id:" + idString);
}
w.DeleteDocuments(new TermQuery(new Term("id", idString)));
docs.Remove(idString);
}
public override void Run()
{
try
{
r = new Random(@base + range + seed);
for (int i = 0; i < iterations; i++)
{
int what = NextInt(100);
if (what < 5)
{
DeleteDoc();
}
else if (what < 10)
{
DeleteByQuery();
}
else
{
IndexDoc();
}
}
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
Console.Write(e.StackTrace);
Assert.Fail(e.ToString());
}
lock (this)
{
int dummy = docs.Count;
}
}
}
}
}