blob: 8aaccc9e015ff35461ff2e7aa2f92509cbf0bb8a [file] [log] [blame]
using J2N.Threading;
using Lucene.Net.Analysis;
using Lucene.Net.Codecs.Lucene42;
using Lucene.Net.Diagnostics;
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Threading;
using JCG = J2N.Collections.Generic;
using Assert = Lucene.Net.TestFramework.Assert;
using static Lucene.Net.Index.TermsEnum;
using J2N.Collections.Generic.Extensions;
#if TESTFRAMEWORK_MSTEST
using Test = Microsoft.VisualStudio.TestTools.UnitTesting.TestMethodAttribute;
#elif TESTFRAMEWORK_NUNIT
using Test = NUnit.Framework.TestAttribute;
#elif TESTFRAMEWORK_XUNIT
using Test = Lucene.Net.TestFramework.SkippableFactAttribute;
#endif
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Abstract class to do basic tests for a <see cref="Codecs.DocValuesFormat"/>.
/// NOTE: this test focuses on the docvalues impl, nothing else.
/// The [stretch] goal is for this test to be
/// so thorough in testing a new <see cref="Codecs.DocValuesFormat"/> that if this
/// test passes, then all Lucene/Solr tests should also pass. Ie,
/// if there is some bug in a given <see cref="Codecs.DocValuesFormat"/> that this
/// test fails to catch then this test needs to be improved!
/// </summary>
public abstract class BaseDocValuesFormatTestCase : BaseIndexFileFormatTestCase
#if TESTFRAMEWORK_XUNIT
, Xunit.IClassFixture<BeforeAfterClass>
{
public BaseDocValuesFormatTestCase(BeforeAfterClass beforeAfter)
: base(beforeAfter)
{
}
#else
{
#endif
protected override void AddRandomFields(Document doc)
{
if (Usually())
{
doc.Add(new NumericDocValuesField("ndv", Random.Next(1 << 12)));
doc.Add(new BinaryDocValuesField("bdv", new BytesRef(TestUtil.RandomSimpleString(Random))));
doc.Add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.RandomSimpleString(Random, 2))));
}
if (DefaultCodecSupportsSortedSet)
{
int numValues = Random.Next(5);
for (int i = 0; i < numValues; ++i)
{
doc.Add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.RandomSimpleString(Random, 2))));
}
}
}
[Test]
public virtual void TestOneNumber()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new NumericDocValuesField("dv", 5));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader))).GetNumericDocValues("dv");
Assert.AreEqual(5L, dv.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - 5L required because types don't match (xUnit checks this)
}
}
[Test]
public virtual void TestOneSingle() // LUCENENET specific - renamed from TestOneFloat
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new SingleDocValuesField("dv", 5.7f));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetNumericDocValues("dv");
Assert.AreEqual((long)J2N.BitConversion.SingleToRawInt32Bits(5.7f), dv.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - cast required because types don't match (xUnit checks this)
}
}
[Test]
public virtual void TestTwoNumbers()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new NumericDocValuesField("dv1", 5));
doc.Add(new NumericDocValuesField("dv2", 17));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetNumericDocValues("dv1");
Assert.AreEqual(5L, dv.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - 5L required because types don't match (xUnit checks this)
dv = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetNumericDocValues("dv2");
Assert.AreEqual(17L, dv.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - 17L required because types don't match (xUnit checks this)
}
}
[Test]
public virtual void TestTwoBinaryValues()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv1", new BytesRef(longTerm)));
doc.Add(new BinaryDocValuesField("dv2", new BytesRef(text)));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
BytesRef scratch = new BytesRef(); // LUCENENET: Moved this outside of the loop for performance
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetBinaryDocValues("dv1");
dv.Get(hits.ScoreDocs[i].Doc, scratch);
Assert.AreEqual(new BytesRef(longTerm), scratch);
dv = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetBinaryDocValues("dv2");
dv.Get(hits.ScoreDocs[i].Doc, scratch);
Assert.AreEqual(new BytesRef(text), scratch);
}
}
[Test]
public virtual void TestTwoFieldsMixed()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new NumericDocValuesField("dv1", 5));
doc.Add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetNumericDocValues("dv1");
Assert.AreEqual(5L, dv.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - 5L required because types don't match (xUnit checks this)
BinaryDocValues dv2 = ((AtomicReader)((AtomicReader)ireader.Leaves[0].Reader)).GetBinaryDocValues("dv2");
dv2.Get(hits.ScoreDocs[i].Doc, scratch);
Assert.AreEqual(new BytesRef("hello world"), scratch);
}
}
[Test]
public virtual void TestThreeFieldsMixed()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new SortedDocValuesField("dv1", new BytesRef("hello hello")));
doc.Add(new NumericDocValuesField("dv2", 5));
doc.Add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv1");
int ord = dv.GetOrd(0);
dv.LookupOrd(ord, scratch);
Assert.AreEqual(new BytesRef("hello hello"), scratch);
NumericDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv2");
Assert.AreEqual(5L, dv2.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - 5L required because types don't match (xUnit checks this)
BinaryDocValues dv3 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv3");
dv3.Get(hits.ScoreDocs[i].Doc, scratch);
Assert.AreEqual(new BytesRef("hello world"), scratch);
}
}
[Test]
public virtual void TestThreeFieldsMixed2()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
using Directory directory = NewDirectory();
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv1", new BytesRef("hello world")));
doc.Add(new SortedDocValuesField("dv2", new BytesRef("hello hello")));
doc.Add(new NumericDocValuesField("dv3", 5));
iwriter.AddDocument(doc);
}// iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv2");
int ord = dv.GetOrd(0);
dv.LookupOrd(ord, scratch);
Assert.AreEqual(new BytesRef("hello hello"), scratch);
NumericDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv3");
Assert.AreEqual(5L, dv2.Get(hits.ScoreDocs[i].Doc)); // LUCENENET specific - 5L required because types don't match (xUnit checks this)
BinaryDocValues dv3 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv1");
dv3.Get(hits.ScoreDocs[i].Doc, scratch);
Assert.AreEqual(new BytesRef("hello world"), scratch);
}
}
[Test]
public virtual void TestTwoDocumentsNumeric()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new NumericDocValuesField("dv", 1));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new NumericDocValuesField("dv", 2));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv");
Assert.AreEqual(1L, dv.Get(0)); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(2L, dv.Get(1)); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
}
[Test]
public virtual void TestTwoDocumentsMerged()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(NewField("id", "0", StringField.TYPE_STORED));
doc.Add(new NumericDocValuesField("dv", -10));
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
doc.Add(NewField("id", "1", StringField.TYPE_STORED));
doc.Add(new NumericDocValuesField("dv", 99));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv");
for (int i = 0; i < 2; i++)
{
Document doc2 = ((AtomicReader)ireader.Leaves[0].Reader).Document(i);
long expected;
if (doc2.Get("id").Equals("0", StringComparison.Ordinal))
{
expected = -10;
}
else
{
expected = 99;
}
Assert.AreEqual(expected, dv.Get(i));
}
}
[Test]
public virtual void TestBigNumericRange()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new NumericDocValuesField("dv", long.MinValue));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new NumericDocValuesField("dv", long.MaxValue));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv");
Assert.AreEqual(long.MinValue, dv.Get(0));
Assert.AreEqual(long.MaxValue, dv.Get(1));
}
[Test]
public virtual void TestBigNumericRange2()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new NumericDocValuesField("dv", -8841491950446638677L));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new NumericDocValuesField("dv", 9062230939892376225L));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory);
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv");
Assert.AreEqual(-8841491950446638677L, dv.Get(0));
Assert.AreEqual(9062230939892376225L, dv.Get(1));
}
[Test]
public virtual void TestBytes()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv", new BytesRef("hello world")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
dv.Get(hits.ScoreDocs[i].Doc, scratch);
Assert.AreEqual(new BytesRef("hello world"), scratch);
}
}
[Test]
public virtual void TestBytesTwoDocumentsMerged()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(NewField("id", "0", StringField.TYPE_STORED));
doc.Add(new BinaryDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
doc.Add(NewField("id", "1", StringField.TYPE_STORED));
doc.Add(new BinaryDocValuesField("dv", new BytesRef("hello 2")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
for (int i = 0; i < 2; i++)
{
Document doc2 = ((AtomicReader)ireader.Leaves[0].Reader).Document(i);
string expected;
if (doc2.Get("id").Equals("0", StringComparison.Ordinal))
{
expected = "hello world 1";
}
else
{
expected = "hello 2";
}
dv.Get(i, scratch);
Assert.AreEqual(expected, scratch.Utf8ToString());
}
}
[Test]
public virtual void TestSortedBytes()
{
string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
string text = "this is the text to be indexed. " + longTerm;
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(NewTextField("fieldname", text, Field.Store.YES));
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.Search(query, null, 1);
Assert.AreEqual(1, hits.TotalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
Assert.AreEqual(text, hitDoc.Get("fieldname"));
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
dv.LookupOrd(dv.GetOrd(hits.ScoreDocs[i].Doc), scratch);
Assert.AreEqual(new BytesRef("hello world"), scratch);
}
}
[Test]
public virtual void TestSortedBytesTwoDocuments()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.LookupOrd(dv.GetOrd(0), scratch);
Assert.AreEqual("hello world 1", scratch.Utf8ToString());
dv.LookupOrd(dv.GetOrd(1), scratch);
Assert.AreEqual("hello world 2", scratch.Utf8ToString());
}
[Test]
public virtual void TestSortedBytesThreeDocuments()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
Assert.AreEqual(2, dv.ValueCount);
BytesRef scratch = new BytesRef();
Assert.AreEqual(0, dv.GetOrd(0));
dv.LookupOrd(0, scratch);
Assert.AreEqual("hello world 1", scratch.Utf8ToString());
Assert.AreEqual(1, dv.GetOrd(1));
dv.LookupOrd(1, scratch);
Assert.AreEqual("hello world 2", scratch.Utf8ToString());
Assert.AreEqual(0, dv.GetOrd(2));
}
[Test]
public virtual void TestSortedBytesTwoDocumentsMerged()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(NewField("id", "0", StringField.TYPE_STORED));
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
doc.Add(NewField("id", "1", StringField.TYPE_STORED));
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
Assert.AreEqual(2, dv.ValueCount); // 2 ords
BytesRef scratch = new BytesRef();
dv.LookupOrd(0, scratch);
Assert.AreEqual(new BytesRef("hello world 1"), scratch);
dv.LookupOrd(1, scratch);
Assert.AreEqual(new BytesRef("hello world 2"), scratch);
for (int i = 0; i < 2; i++)
{
Document doc2 = ((AtomicReader)ireader.Leaves[0].Reader).Document(i);
string expected;
if (doc2.Get("id").Equals("0", StringComparison.Ordinal))
{
expected = "hello world 1";
}
else
{
expected = "hello world 2";
}
dv.LookupOrd(dv.GetOrd(i), scratch);
Assert.AreEqual(expected, scratch.Utf8ToString());
}
}
[Test]
public virtual void TestSortedMergeAwayAllValues()
{
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.NO));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.NO));
doc.Add(new SortedDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
iwriter.Commit();
iwriter.DeleteDocuments(new Term("id", "1"));
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedDocValues dv = GetOnlySegmentReader(ireader).GetSortedDocValues("field");
if (DefaultCodecSupportsDocsWithField)
{
Assert.AreEqual(-1, dv.GetOrd(0));
Assert.AreEqual(0, dv.ValueCount);
}
else
{
Assert.AreEqual(0, dv.GetOrd(0));
Assert.AreEqual(1, dv.ValueCount);
BytesRef @ref = new BytesRef();
dv.LookupOrd(0, @ref);
Assert.AreEqual(new BytesRef(), @ref);
}
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestBytesWithNewline()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new BinaryDocValuesField("dv", new BytesRef("hello\nworld\r1")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.Get(0, scratch);
Assert.AreEqual(new BytesRef("hello\nworld\r1"), scratch);
}
[Test]
public virtual void TestMissingSortedBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.AddDocument(doc);
// 2nd doc missing the DV field
iwriter.AddDocument(new Document());
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory);
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.LookupOrd(dv.GetOrd(0), scratch);
Assert.AreEqual(new BytesRef("hello world 2"), scratch);
if (DefaultCodecSupportsDocsWithField)
{
Assert.AreEqual(-1, dv.GetOrd(1));
}
dv.Get(1, scratch);
Assert.AreEqual(new BytesRef(""), scratch);
}
[Test]
public virtual void TestSortedTermsEnum()
{
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("field", new BytesRef("world")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("field", new BytesRef("beer")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedDocValues dv = GetOnlySegmentReader(ireader).GetSortedDocValues("field");
Assert.AreEqual(3, dv.ValueCount);
TermsEnum termsEnum = dv.GetTermsEnum();
// next()
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
Assert.AreEqual(2L, termsEnum.Ord); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
// seekCeil()
Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));
// seekExact()
Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
Assert.AreEqual(2L, termsEnum.Ord); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.SeekExact(0);
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
termsEnum.SeekExact(1);
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
termsEnum.SeekExact(2);
Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
Assert.AreEqual(2L, termsEnum.Ord); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestEmptySortedBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory);
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
SortedDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
BytesRef scratch = new BytesRef();
Assert.AreEqual(0, dv.GetOrd(0));
Assert.AreEqual(0, dv.GetOrd(1));
dv.LookupOrd(dv.GetOrd(0), scratch);
Assert.AreEqual("", scratch.Utf8ToString());
}
[Test]
public virtual void TestEmptyBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new BinaryDocValuesField("dv", new BytesRef("")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new BinaryDocValuesField("dv", new BytesRef("")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory);
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.Get(0, scratch);
Assert.AreEqual("", scratch.Utf8ToString());
dv.Get(1, scratch);
Assert.AreEqual("", scratch.Utf8ToString());
}
[Test]
public virtual void TestVeryLargeButLegalBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
var bytes = new byte[32766];
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
Random.NextBytes(bytes);
doc.Add(new BinaryDocValuesField("dv", b));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.Get(0, scratch);
Assert.AreEqual(new BytesRef(bytes), scratch);
}
[Test]
public virtual void TestVeryLargeButLegalSortedBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
var bytes = new byte[32766];
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
Random.NextBytes(bytes);
doc.Add(new SortedDocValuesField("dv", b));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.Get(0, scratch);
Assert.AreEqual(new BytesRef(bytes), scratch);
}
[Test]
public virtual void TestCodecUsesOwnBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new BinaryDocValuesField("dv", new BytesRef("boo!")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
var mybytes = new byte[20];
BytesRef scratch = new BytesRef(mybytes);
dv.Get(0, scratch);
Assert.AreEqual("boo!", scratch.Utf8ToString());
Assert.IsFalse(scratch.Bytes == mybytes);
}
[Test]
public virtual void TestCodecUsesOwnSortedBytes()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("boo!")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
var mybytes = new byte[20];
BytesRef scratch = new BytesRef(mybytes);
dv.Get(0, scratch);
Assert.AreEqual("boo!", scratch.Utf8ToString());
Assert.IsFalse(scratch.Bytes == mybytes);
}
[Test]
public virtual void TestCodecUsesOwnBytesEachTime()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.Get(0, scratch);
Assert.AreEqual("foo!", scratch.Utf8ToString());
BytesRef scratch2 = new BytesRef();
dv.Get(1, scratch2);
Assert.AreEqual("bar!", scratch2.Utf8ToString());
// check scratch is still valid
Assert.AreEqual("foo!", scratch.Utf8ToString());
}
[Test]
public virtual void TestCodecUsesOwnSortedBytesEachTime()
{
Analyzer analyzer = new MockAnalyzer(Random);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("foo!")));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedDocValuesField("dv", new BytesRef("bar!")));
iwriter.AddDocument(doc);
} // iwriter.Dispose();
// Now search the index:
using IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
if (Debugging.AssertsEnabled) Debugging.Assert(ireader.Leaves.Count == 1);
BinaryDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.Get(0, scratch);
Assert.AreEqual("foo!", scratch.Utf8ToString());
BytesRef scratch2 = new BytesRef();
dv.Get(1, scratch2);
Assert.AreEqual("bar!", scratch2.Utf8ToString());
// check scratch is still valid
Assert.AreEqual("foo!", scratch.Utf8ToString());
}
/// <summary>
/// Simple test case to show how to use the API
/// </summary>
[Test]
public virtual void TestDocValuesSimple()
{
using Directory dir = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.SetMergePolicy(NewLogMergePolicy());
using (IndexWriter writer = new IndexWriter(dir, conf))
{
for (int i = 0; i < 5; i++)
{
Document doc = new Document();
doc.Add(new NumericDocValuesField("docId", i));
doc.Add(new TextField("docId", "" + i, Field.Store.NO));
writer.AddDocument(doc);
}
writer.Commit();
writer.ForceMerge(1, true);
} // writer.Dispose();
using DirectoryReader reader = DirectoryReader.Open(dir, 1);
Assert.AreEqual(1, reader.Leaves.Count);
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("docId", "0")), Occur.SHOULD);
query.Add(new TermQuery(new Term("docId", "1")), Occur.SHOULD);
query.Add(new TermQuery(new Term("docId", "2")), Occur.SHOULD);
query.Add(new TermQuery(new Term("docId", "3")), Occur.SHOULD);
query.Add(new TermQuery(new Term("docId", "4")), Occur.SHOULD);
TopDocs search = searcher.Search(query, 10);
Assert.AreEqual(5, search.TotalHits);
ScoreDoc[] scoreDocs = search.ScoreDocs;
NumericDocValues docValues = GetOnlySegmentReader(reader).GetNumericDocValues("docId");
for (int i = 0; i < scoreDocs.Length; i++)
{
Assert.AreEqual(i, scoreDocs[i].Doc);
Assert.AreEqual((long)i, docValues.Get(scoreDocs[i].Doc)); // LUCENENET specific - cast required because types don't match (xUnit checks this)
}
}
[Test]
public virtual void TestRandomSortedBytes()
{
using Directory dir = NewDirectory();
IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
if (!DefaultCodecSupportsDocsWithField)
{
// if the codec doesnt support missing, we expect missing to be mapped to byte[]
// by the impersonator, but we have to give it a chance to merge them to this
cfg.SetMergePolicy(NewLogMergePolicy());
}
using RandomIndexWriter w = new RandomIndexWriter(Random, dir, cfg);
int numDocs = AtLeast(100);
BytesRefHash hash = new BytesRefHash();
IDictionary<string, string> docToString = new Dictionary<string, string>();
int maxLength = TestUtil.NextInt32(Random, 1, 50);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
doc.Add(NewTextField("id", "" + i, Field.Store.YES));
string @string = TestUtil.RandomRealisticUnicodeString(Random, 1, maxLength);
BytesRef br = new BytesRef(@string);
doc.Add(new SortedDocValuesField("field", br));
hash.Add(br);
docToString["" + i] = @string;
w.AddDocument(doc);
}
if (Rarely())
{
w.Commit();
}
int numDocsNoValue = AtLeast(10);
for (int i = 0; i < numDocsNoValue; i++)
{
Document doc = new Document();
doc.Add(NewTextField("id", "noValue", Field.Store.YES));
w.AddDocument(doc);
}
if (!DefaultCodecSupportsDocsWithField)
{
BytesRef bytesRef = new BytesRef();
hash.Add(bytesRef); // add empty value for the gaps
}
if (Rarely())
{
w.Commit();
}
if (!DefaultCodecSupportsDocsWithField)
{
// if the codec doesnt support missing, we expect missing to be mapped to byte[]
// by the impersonator, but we have to give it a chance to merge them to this
w.ForceMerge(1);
}
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
string id = "" + i + numDocs;
doc.Add(NewTextField("id", id, Field.Store.YES));
string @string = TestUtil.RandomRealisticUnicodeString(Random, 1, maxLength);
BytesRef br = new BytesRef(@string);
hash.Add(br);
docToString[id] = @string;
doc.Add(new SortedDocValuesField("field", br));
w.AddDocument(doc);
}
w.Commit();
using IndexReader reader = w.GetReader();
SortedDocValues docValues = MultiDocValues.GetSortedValues(reader, "field");
int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
BytesRef expected = new BytesRef();
BytesRef actual = new BytesRef();
Assert.AreEqual(hash.Count, docValues.ValueCount);
for (int i = 0; i < hash.Count; i++)
{
hash.Get(sort[i], expected);
docValues.LookupOrd(i, actual);
Assert.AreEqual(expected.Utf8ToString(), actual.Utf8ToString());
int ord = docValues.LookupTerm(expected);
Assert.AreEqual(i, ord);
}
AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(reader);
foreach (KeyValuePair<string, string> entry in docToString)
{
// pk lookup
DocsEnum termDocsEnum = slowR.GetTermDocsEnum(new Term("id", entry.Key));
int docId = termDocsEnum.NextDoc();
expected = new BytesRef(entry.Value);
docValues.Get(docId, actual);
Assert.AreEqual(expected, actual);
}
}
internal abstract class Int64Producer
{
internal abstract long Next();
}
private void DoTestNumericsVsStoredFields(long minValue, long maxValue)
{
DoTestNumericsVsStoredFields(new Int64ProducerAnonymousClass(minValue, maxValue));
}
private class Int64ProducerAnonymousClass : Int64Producer
{
private readonly long minValue;
private readonly long maxValue;
public Int64ProducerAnonymousClass(long minValue, long maxValue)
{
this.minValue = minValue;
this.maxValue = maxValue;
}
internal override long Next()
{
return TestUtil.NextInt64(Random, minValue, maxValue);
}
}
private static void DoTestNumericsVsStoredFields(Int64Producer longs) // LUCENENET: CA1822: Mark members as static
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = NewStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.Add(idField);
doc.Add(storedField);
doc.Add(dvField);
// index some docs
int numDocs = AtLeast(300);
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
if (Debugging.AssertsEnabled) Debugging.Assert(numDocs > 256);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
long value = longs.Next();
storedField.SetStringValue(Convert.ToString(value, CultureInfo.InvariantCulture));
dvField.SetInt64Value(value);
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.ForceMerge(numDocs / 256);
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
NumericDocValues docValues = r.GetNumericDocValues("dv");
for (int i = 0; i < r.MaxDoc; i++)
{
long storedValue = Convert.ToInt64(r.Document(i).Get("stored"), CultureInfo.InvariantCulture);
Assert.AreEqual(storedValue, docValues.Get(i));
}
}
}
private void DoTestMissingVsFieldCache(long minValue, long maxValue)
{
DoTestMissingVsFieldCache(new Int64ProducerAnonymousClass2(minValue, maxValue));
}
private class Int64ProducerAnonymousClass2 : Int64Producer
{
private readonly long minValue;
private readonly long maxValue;
public Int64ProducerAnonymousClass2(long minValue, long maxValue)
{
this.minValue = minValue;
this.maxValue = maxValue;
}
internal override long Next()
{
return TestUtil.NextInt64(Random, minValue, maxValue);
}
}
private static void DoTestMissingVsFieldCache(Int64Producer longs) // LUCENENET: CA1822: Mark members as static
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = NewStringField("indexed", "", Field.Store.NO);
Field dvField = new NumericDocValuesField("dv", 0);
// index some docs
int numDocs = AtLeast(300);
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
if (Debugging.AssertsEnabled) Debugging.Assert(numDocs > 256);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
long value = longs.Next();
indexedField.SetStringValue(Convert.ToString(value, CultureInfo.InvariantCulture));
dvField.SetInt64Value(value);
Document doc = new Document();
doc.Add(idField);
// 1/4 of the time we neglect to add the fields
if (Random.Next(4) > 0)
{
doc.Add(indexedField);
doc.Add(dvField);
}
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.ForceMerge(numDocs / 256);
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
foreach (var context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
IBits expected = FieldCache.DEFAULT.GetDocsWithField(r, "indexed");
IBits actual = FieldCache.DEFAULT.GetDocsWithField(r, "dv");
AssertEquals(expected, actual);
}
}
[Test]
public virtual void TestBooleanNumericsVsStoredFields()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestNumericsVsStoredFields(0, 1);
}
}
[Test]
public virtual void TestByteNumericsVsStoredFields()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestNumericsVsStoredFields(sbyte.MinValue, sbyte.MaxValue);
}
}
[Test]
public virtual void TestByteMissingVsFieldCache()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestMissingVsFieldCache(sbyte.MinValue, sbyte.MaxValue);
}
}
[Test]
public virtual void TestInt16NumericsVsStoredFields() // LUCENENET specific - renamed from TestShortNumericsVsStoredFields
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestNumericsVsStoredFields(short.MinValue, short.MaxValue);
}
}
[Test]
public virtual void TestInt16MissingVsFieldCache() // LUCENENET specific - renamed from TestShortMissingVsFieldCache
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestMissingVsFieldCache(short.MinValue, short.MaxValue);
}
}
[Test]
public virtual void TestInt32NumericsVsStoredFields() // LUCENENET specific - renamed from TestIntNumericsVsStoredFields
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestNumericsVsStoredFields(int.MinValue, int.MaxValue);
}
}
[Test]
public virtual void TestInt32MissingVsFieldCache() // LUCENENET specific - renamed from TestIntMissingVsFieldCache
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestMissingVsFieldCache(int.MinValue, int.MaxValue);
}
}
[Test]
public virtual void TestInt64NumericsVsStoredFields() // LUCENENET specific - renamed from TestLongNumericsVsStoredFields
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestNumericsVsStoredFields(long.MinValue, long.MaxValue);
}
}
[Test]
public virtual void TestInt64MissingVsFieldCache() // LUCENENET specific - renamed from TestLongMissingVsFieldCache
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestMissingVsFieldCache(long.MinValue, long.MaxValue);
}
}
private void DoTestBinaryVsStoredFields(int minLength, int maxLength)
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", Arrays.Empty<byte>());
Field dvField = new BinaryDocValuesField("dv", new BytesRef());
doc.Add(idField);
doc.Add(storedField);
doc.Add(dvField);
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
int length;
if (minLength == maxLength)
{
length = minLength; // fixed length
}
else
{
length = TestUtil.NextInt32(Random, minLength, maxLength);
}
var buffer = new byte[length];
Random.NextBytes(buffer);
storedField.SetBytesValue(new BytesRef(buffer));
dvField.SetBytesValue(new BytesRef(buffer));
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
BytesRef scratch = new BytesRef(); // LUCENENET: Moved outside of the loop for performance
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
BinaryDocValues docValues = r.GetBinaryDocValues("dv");
for (int i = 0; i < r.MaxDoc; i++)
{
BytesRef binaryValue = r.Document(i).GetBinaryValue("stored");
docValues.Get(i, scratch);
Assert.AreEqual(binaryValue, scratch);
}
}
}
[Test]
public virtual void TestBinaryFixedLengthVsStoredFields()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
int fixedLength = TestUtil.NextInt32(Random, 0, 10);
DoTestBinaryVsStoredFields(fixedLength, fixedLength);
}
}
[Test]
public virtual void TestBinaryVariableLengthVsStoredFields()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestBinaryVsStoredFields(0, 10);
}
}
private void DoTestSortedVsStoredFields(int minLength, int maxLength)
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", Arrays.Empty<byte>());
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.Add(idField);
doc.Add(storedField);
doc.Add(dvField);
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
int length;
if (minLength == maxLength)
{
length = minLength; // fixed length
}
else
{
length = TestUtil.NextInt32(Random, minLength, maxLength);
}
var buffer = new byte[length];
Random.NextBytes(buffer);
storedField.SetBytesValue(new BytesRef(buffer));
dvField.SetBytesValue(new BytesRef(buffer));
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
BytesRef scratch = new BytesRef(); // LUCENENET: Moved outside of the loop for performance
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
BinaryDocValues docValues = r.GetSortedDocValues("dv");
for (int i = 0; i < r.MaxDoc; i++)
{
BytesRef binaryValue = r.Document(i).GetBinaryValue("stored");
docValues.Get(i, scratch);
Assert.AreEqual(binaryValue, scratch);
}
}
}
private void DoTestSortedVsFieldCache(int minLength, int maxLength)
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = new StringField("indexed", "", Field.Store.NO);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.Add(idField);
doc.Add(indexedField);
doc.Add(dvField);
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
int length;
if (minLength == maxLength)
{
length = minLength; // fixed length
}
else
{
length = TestUtil.NextInt32(Random, minLength, maxLength);
}
string value = TestUtil.RandomSimpleString(Random, length);
indexedField.SetStringValue(value);
dvField.SetBytesValue(new BytesRef(value));
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
SortedDocValues expected = FieldCache.DEFAULT.GetTermsIndex(r, "indexed");
SortedDocValues actual = r.GetSortedDocValues("dv");
AssertEquals(r.MaxDoc, expected, actual);
}
}
[Test]
public virtual void TestSortedFixedLengthVsStoredFields()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
int fixedLength = TestUtil.NextInt32(Random, 1, 10);
DoTestSortedVsStoredFields(fixedLength, fixedLength);
}
}
[Test]
public virtual void TestSortedFixedLengthVsFieldCache()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
int fixedLength = TestUtil.NextInt32(Random, 1, 10);
DoTestSortedVsFieldCache(fixedLength, fixedLength);
}
}
[Test]
public virtual void TestSortedVariableLengthVsFieldCache()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestSortedVsFieldCache(1, 10);
}
}
[Test]
public virtual void TestSortedVariableLengthVsStoredFields()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestSortedVsStoredFields(1, 10);
}
}
[Test]
public virtual void TestSortedSetOneValue()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoFields()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.Add(new SortedSetDocValuesField("field2", new BytesRef("world")));
iwriter.AddDocument(doc);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field2");
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("world"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoDocumentsMerged()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("world")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(2L, dv.ValueCount); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
dv.SetDocument(1);
Assert.AreEqual(1L, dv.NextOrd()); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
dv.LookupOrd(1, bytes);
Assert.AreEqual(new BytesRef("world"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoValues()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("world")));
iwriter.AddDocument(doc);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(1L, dv.NextOrd()); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
dv.LookupOrd(1, bytes);
Assert.AreEqual(new BytesRef("world"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoValuesUnordered()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("world")));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(1L, dv.NextOrd()); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
dv.LookupOrd(1, bytes);
Assert.AreEqual(new BytesRef("world"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetThreeValuesTwoDocs()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("world")));
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("beer")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(3L, dv.ValueCount); // LUCENENET specific - 3L required because types don't match (xUnit checks this)
dv.SetDocument(0);
Assert.AreEqual(1L, dv.NextOrd()); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(2L, dv.NextOrd()); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
dv.SetDocument(1);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(1L, dv.NextOrd()); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("beer"), bytes);
dv.LookupOrd(1, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
dv.LookupOrd(2, bytes);
Assert.AreEqual(new BytesRef("world"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoDocumentsLastMissing()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
doc = new Document();
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(1L, dv.ValueCount); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoDocumentsLastMissingMerge()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(1L, dv.ValueCount); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
dv.SetDocument(0);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoDocumentsFirstMissing()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(1L, dv.ValueCount); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
dv.SetDocument(1);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTwoDocumentsFirstMissingMerge()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
iwriter.AddDocument(doc);
iwriter.Commit();
doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(1L, dv.ValueCount); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
dv.SetDocument(1);
Assert.AreEqual(0L, dv.NextOrd()); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
BytesRef bytes = new BytesRef();
dv.LookupOrd(0, bytes);
Assert.AreEqual(new BytesRef("hello"), bytes);
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetMergeAwayAllValues()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.NO));
iwriter.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.NO));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.AddDocument(doc);
iwriter.Commit();
iwriter.DeleteDocuments(new Term("id", "1"));
iwriter.ForceMerge(1);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(0L, dv.ValueCount); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
}
finally
{
ireader?.Dispose();
}
}
[Test]
public virtual void TestSortedSetTermsEnum()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory directory = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random);
IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.SetMergePolicy(NewLogMergePolicy());
DirectoryReader ireader = null;
try
{
using (RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig))
{
Document doc = new Document();
doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("world")));
doc.Add(new SortedSetDocValuesField("field", new BytesRef("beer")));
iwriter.AddDocument(doc);
ireader = iwriter.GetReader();
} // iwriter.Dispose();
SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
Assert.AreEqual(3L, dv.ValueCount); // LUCENENET specific - 3L required because types don't match (xUnit checks this)
TermsEnum termsEnum = dv.GetTermsEnum();
// next()
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
Assert.AreEqual(2L, termsEnum.Ord); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
// seekCeil()
Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));
// seekExact()
Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
Assert.AreEqual(2L, termsEnum.Ord); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.SeekExact(0);
Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
Assert.AreEqual(0L, termsEnum.Ord); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
termsEnum.SeekExact(1);
Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
Assert.AreEqual(1L, termsEnum.Ord); // LUCENENET specific - 1L required because types don't match (xUnit checks this)
termsEnum.SeekExact(2);
Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
Assert.AreEqual(2L, termsEnum.Ord); // LUCENENET specific - 2L required because types don't match (xUnit checks this)
}
finally
{
ireader?.Dispose();
}
}
private void DoTestSortedSetVsStoredFields(int minLength, int maxLength, int maxValuesPerDoc)
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
Field idField = new StringField("id", Convert.ToString(i, CultureInfo.InvariantCulture), Field.Store.NO);
doc.Add(idField);
int length;
if (minLength == maxLength)
{
length = minLength; // fixed length
}
else
{
length = TestUtil.NextInt32(Random, minLength, maxLength);
}
int numValues = TestUtil.NextInt32(Random, 0, maxValuesPerDoc);
// create a random set of strings
// LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
JCG.SortedSet<string> values = new JCG.SortedSet<string>(StringComparer.Ordinal);
for (int v = 0; v < numValues; v++)
{
values.Add(TestUtil.RandomSimpleString(Random, length));
}
// add ordered to the stored field
foreach (string v in values)
{
doc.Add(new StoredField("stored", v));
}
// add in any order to the dv field
IList<string> unordered = new List<string>(values);
unordered.Shuffle(Random);
foreach (string v in unordered)
{
doc.Add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
SortedSetDocValues docValues = r.GetSortedSetDocValues("dv");
BytesRef scratch = new BytesRef();
for (int i = 0; i < r.MaxDoc; i++)
{
string[] stringValues = r.Document(i).GetValues("stored");
if (docValues != null)
{
docValues.SetDocument(i);
}
for (int j = 0; j < stringValues.Length; j++)
{
if (Debugging.AssertsEnabled) Debugging.Assert(docValues != null);
long ord = docValues.NextOrd();
if (Debugging.AssertsEnabled) Debugging.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
docValues.LookupOrd(ord, scratch);
Assert.AreEqual(stringValues[j], scratch.Utf8ToString());
}
if (Debugging.AssertsEnabled) Debugging.Assert(docValues == null || docValues.NextOrd() == SortedSetDocValues.NO_MORE_ORDS);
}
}
}
[Test]
public virtual void TestSortedSetFixedLengthVsStoredFields()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
int fixedLength = TestUtil.NextInt32(Random, 1, 10);
DoTestSortedSetVsStoredFields(fixedLength, fixedLength, 16);
}
}
[Test]
public virtual void TestSortedSetVariableLengthVsStoredFields()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestSortedSetVsStoredFields(1, 10, 16);
}
}
[Test]
public virtual void TestSortedSetFixedLengthSingleValuedVsStoredFields()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
int fixedLength = TestUtil.NextInt32(Random, 1, 10);
DoTestSortedSetVsStoredFields(fixedLength, fixedLength, 1);
}
}
[Test]
public virtual void TestSortedSetVariableLengthSingleValuedVsStoredFields()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestSortedSetVsStoredFields(1, 10, 1);
}
}
private static void AssertEquals(IBits expected, IBits actual) // LUCENENET: CA1822: Mark members as static
{
Assert.AreEqual(expected.Length, actual.Length);
for (int i = 0; i < expected.Length; i++)
{
Assert.AreEqual(expected.Get(i), actual.Get(i));
}
}
private static void AssertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) // LUCENENET: CA1822: Mark members as static
{
AssertEquals(maxDoc, new SingletonSortedSetDocValues(expected), new SingletonSortedSetDocValues(actual));
}
private static void AssertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) // LUCENENET: CA1822: Mark members as static
{
// can be null for the segment if no docs actually had any SortedDocValues
// in this case FC.getDocTermsOrds returns EMPTY
if (actual == null)
{
Assert.AreEqual(DocValues.EMPTY_SORTED_SET, expected);
return;
}
Assert.AreEqual(expected.ValueCount, actual.ValueCount);
// compare ord lists
for (int i = 0; i < maxDoc; i++)
{
expected.SetDocument(i);
actual.SetDocument(i);
long expectedOrd;
while ((expectedOrd = expected.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
{
Assert.AreEqual(expectedOrd, actual.NextOrd());
}
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, actual.NextOrd());
}
// compare ord dictionary
BytesRef expectedBytes = new BytesRef();
BytesRef actualBytes = new BytesRef();
for (long i = 0; i < expected.ValueCount; i++)
{
expected.LookupTerm(expectedBytes);
actual.LookupTerm(actualBytes);
Assert.AreEqual(expectedBytes, actualBytes);
}
// compare termsenum
AssertEquals(expected.ValueCount, expected.GetTermsEnum(), actual.GetTermsEnum());
}
private static void AssertEquals(long numOrds, TermsEnum expected, TermsEnum actual) // LUCENENET: CA1822: Mark members as static
{
// sequential next() through all terms
while (expected.MoveNext())
{
Assert.IsTrue(actual.MoveNext());
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
Assert.IsFalse(actual.MoveNext());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++)
{
expected.SeekExact(i);
actual.SeekExact(i);
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++)
{
expected.SeekExact(i);
Assert.IsTrue(actual.SeekExact(expected.Term));
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++)
{
expected.SeekExact(i);
Assert.AreEqual(SeekStatus.FOUND, actual.SeekCeil(expected.Term));
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++)
{
long randomOrd = TestUtil.NextInt64(Random, 0, numOrds - 1);
expected.SeekExact(randomOrd);
actual.SeekExact(randomOrd);
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++)
{
long randomOrd = TestUtil.NextInt64(Random, 0, numOrds - 1);
expected.SeekExact(randomOrd);
actual.SeekExact(expected.Term);
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++)
{
BytesRef target = new BytesRef(TestUtil.RandomUnicodeString(Random));
SeekStatus expectedStatus = expected.SeekCeil(target);
Assert.AreEqual(expectedStatus, actual.SeekCeil(target));
if (expectedStatus != SeekStatus.END)
{
Assert.AreEqual(expected.Ord, actual.Ord);
Assert.AreEqual(expected.Term, actual.Term);
}
}
}
private void DoTestSortedSetVsUninvertedField(int minLength, int maxLength)
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf);
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
Field idField = new StringField("id", Convert.ToString(i, CultureInfo.InvariantCulture), Field.Store.NO);
doc.Add(idField);
int length;
if (minLength == maxLength)
{
length = minLength; // fixed length
}
else
{
length = TestUtil.NextInt32(Random, minLength, maxLength);
}
int numValues = Random.Next(17);
// create a random list of strings
IList<string> values = new List<string>();
for (int v = 0; v < numValues; v++)
{
values.Add(TestUtil.RandomSimpleString(Random, length));
}
// add in any order to the indexed field
IList<string> unordered = new List<string>(values);
unordered.Shuffle(Random);
foreach (string v in unordered)
{
doc.Add(NewStringField("indexed", v, Field.Store.NO));
}
// add in any order to the dv field
IList<string> unordered2 = new List<string>(values);
unordered2.Shuffle(Random);
foreach (string v in unordered2)
{
doc.Add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
// compare per-segment
using (DirectoryReader ir = writer.GetReader())
{
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
SortedSetDocValues expected = FieldCache.DEFAULT.GetDocTermOrds(r, "indexed");
SortedSetDocValues actual = r.GetSortedSetDocValues("dv");
AssertEquals(r.MaxDoc, expected, actual);
}
} // ir.Dispose();
writer.ForceMerge(1);
// now compare again after the merge
using (DirectoryReader ir = writer.GetReader())
{
AtomicReader ar = GetOnlySegmentReader(ir);
SortedSetDocValues expected_ = FieldCache.DEFAULT.GetDocTermOrds(ar, "indexed");
SortedSetDocValues actual_ = ar.GetSortedSetDocValues("dv");
AssertEquals(ir.MaxDoc, expected_, actual_);
} // ir.Dispose();
}
[Test]
public virtual void TestSortedSetFixedLengthVsUninvertedField()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
int fixedLength = TestUtil.NextInt32(Random, 1, 10);
DoTestSortedSetVsUninvertedField(fixedLength, fixedLength);
}
}
[Test]
public virtual void TestSortedSetVariableLengthVsUninvertedField()
{
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
DoTestSortedSetVsUninvertedField(1, 10);
}
}
[Test]
public virtual void TestGCDCompression()
{
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
long min = -(((long)Random.Next(1 << 30)) << 32);
long mul = Random.Next() & 0xFFFFFFFFL;
Int64Producer longs = new Int64ProducerAnonymousClass3(min, mul);
DoTestNumericsVsStoredFields(longs);
}
}
private class Int64ProducerAnonymousClass3 : Int64Producer
{
private readonly long min;
private readonly long mul;
public Int64ProducerAnonymousClass3(long min, long mul)
{
this.min = min;
this.mul = mul;
}
internal override long Next()
{
return min + mul * Random.Next(1 << 20);
}
}
[Test]
public virtual void TestZeros()
{
DoTestNumericsVsStoredFields(0, 0);
}
[Test]
public virtual void TestZeroOrMin()
{
// try to make GCD compression fail if the format did not anticipate that
// the GCD of 0 and MIN_VALUE is negative
int numIterations = AtLeast(1);
for (int i = 0; i < numIterations; i++)
{
Int64Producer longs = new Int64ProducerAnonymousClass4();
DoTestNumericsVsStoredFields(longs);
}
}
private class Int64ProducerAnonymousClass4 : Int64Producer
{
internal override long Next()
{
return Random.NextBoolean() ? 0 : long.MinValue;
}
}
[Test]
public virtual void TestTwoNumbersOneMissing()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iw = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.YES));
doc.Add(new NumericDocValuesField("dv1", 0));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.YES));
iw.AddDocument(doc);
iw.ForceMerge(1);
} // iw.Dispose();
using IndexReader ir = DirectoryReader.Open(directory);
Assert.AreEqual(1, ir.Leaves.Count);
AtomicReader ar = (AtomicReader)ir.Leaves[0].Reader;
NumericDocValues dv = ar.GetNumericDocValues("dv1");
Assert.AreEqual(0L, dv.Get(0)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(0L, dv.Get(1)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
IBits docsWithField = ar.GetDocsWithField("dv1");
Assert.IsTrue(docsWithField.Get(0));
Assert.IsFalse(docsWithField.Get(1));
}
[Test]
public virtual void TestTwoNumbersOneMissingWithMerging()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iw = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.YES));
doc.Add(new NumericDocValuesField("dv1", 0));
iw.AddDocument(doc);
iw.Commit();
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.YES));
iw.AddDocument(doc);
iw.ForceMerge(1);
} // iw.Dispose();
using IndexReader ir = DirectoryReader.Open(directory);
Assert.AreEqual(1, ir.Leaves.Count);
AtomicReader ar = (AtomicReader)ir.Leaves[0].Reader;
NumericDocValues dv = ar.GetNumericDocValues("dv1");
Assert.AreEqual(0L, dv.Get(0)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(0L, dv.Get(1)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
IBits docsWithField = ar.GetDocsWithField("dv1");
Assert.IsTrue(docsWithField.Get(0));
Assert.IsFalse(docsWithField.Get(1));
}
[Test]
public virtual void TestThreeNumbersOneMissingWithMerging()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iw = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.YES));
doc.Add(new NumericDocValuesField("dv1", 0));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.YES));
iw.AddDocument(doc);
iw.Commit();
doc = new Document();
doc.Add(new StringField("id", "2", Field.Store.YES));
doc.Add(new NumericDocValuesField("dv1", 5));
iw.AddDocument(doc);
iw.ForceMerge(1);
} // iw.Dispose();
using IndexReader ir = DirectoryReader.Open(directory);
Assert.AreEqual(1, ir.Leaves.Count);
AtomicReader ar = (AtomicReader)ir.Leaves[0].Reader;
NumericDocValues dv = ar.GetNumericDocValues("dv1");
Assert.AreEqual(0L, dv.Get(0)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(0L, dv.Get(1)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
Assert.AreEqual(5L, dv.Get(2)); // LUCENENET specific - 5L required because types don't match (xUnit checks this)
IBits docsWithField = ar.GetDocsWithField("dv1");
Assert.IsTrue(docsWithField.Get(0));
Assert.IsFalse(docsWithField.Get(1));
Assert.IsTrue(docsWithField.Get(2));
}
[Test]
public virtual void TestTwoBytesOneMissing()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iw = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv1", new BytesRef()));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.YES));
iw.AddDocument(doc);
iw.ForceMerge(1);
} // iw.Dispose();
using IndexReader ir = DirectoryReader.Open(directory);
Assert.AreEqual(1, ir.Leaves.Count);
AtomicReader ar = (AtomicReader)ir.Leaves[0].Reader;
BinaryDocValues dv = ar.GetBinaryDocValues("dv1");
BytesRef @ref = new BytesRef();
dv.Get(0, @ref);
Assert.AreEqual(new BytesRef(), @ref);
dv.Get(1, @ref);
Assert.AreEqual(new BytesRef(), @ref);
IBits docsWithField = ar.GetDocsWithField("dv1");
Assert.IsTrue(docsWithField.Get(0));
Assert.IsFalse(docsWithField.Get(1));
}
[Test]
public virtual void TestTwoBytesOneMissingWithMerging()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iw = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv1", new BytesRef()));
iw.AddDocument(doc);
iw.Commit();
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.YES));
iw.AddDocument(doc);
iw.ForceMerge(1);
} // iw.Dispose();
using IndexReader ir = DirectoryReader.Open(directory);
Assert.AreEqual(1, ir.Leaves.Count);
AtomicReader ar = (AtomicReader)ir.Leaves[0].Reader;
BinaryDocValues dv = ar.GetBinaryDocValues("dv1");
BytesRef @ref = new BytesRef();
dv.Get(0, @ref);
Assert.AreEqual(new BytesRef(), @ref);
dv.Get(1, @ref);
Assert.AreEqual(new BytesRef(), @ref);
IBits docsWithField = ar.GetDocsWithField("dv1");
Assert.IsTrue(docsWithField.Get(0));
Assert.IsFalse(docsWithField.Get(1));
}
[Test]
public virtual void TestThreeBytesOneMissingWithMerging()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
using Directory directory = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.SetMergePolicy(NewLogMergePolicy());
using (RandomIndexWriter iw = new RandomIndexWriter(Random, directory, conf))
{
Document doc = new Document();
doc.Add(new StringField("id", "0", Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv1", new BytesRef()));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new StringField("id", "1", Field.Store.YES));
iw.AddDocument(doc);
iw.Commit();
doc = new Document();
doc.Add(new StringField("id", "2", Field.Store.YES));
doc.Add(new BinaryDocValuesField("dv1", new BytesRef("boo")));
iw.AddDocument(doc);
iw.ForceMerge(1);
} // iw.Dispose();
using IndexReader ir = DirectoryReader.Open(directory);
Assert.AreEqual(1, ir.Leaves.Count);
AtomicReader ar = (AtomicReader)ir.Leaves[0].Reader;
BinaryDocValues dv = ar.GetBinaryDocValues("dv1");
BytesRef @ref = new BytesRef();
dv.Get(0, @ref);
Assert.AreEqual(new BytesRef(), @ref);
dv.Get(1, @ref);
Assert.AreEqual(new BytesRef(), @ref);
dv.Get(2, @ref);
Assert.AreEqual(new BytesRef("boo"), @ref);
IBits docsWithField = ar.GetDocsWithField("dv1");
Assert.IsTrue(docsWithField.Get(0));
Assert.IsFalse(docsWithField.Get(1));
Assert.IsTrue(docsWithField.Get(2));
}
// LUCENE-4853
[Test]
public virtual void TestHugeBinaryValues()
{
Analyzer analyzer = new MockAnalyzer(Random);
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = NewFSDirectory(CreateTempDir("hugeBinaryValues"));
bool directoryDisposed = false;
try
{
bool doFixed = Random.NextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed)
{
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.NextInt32(Random, 10, 20);
fixedLength = TestUtil.NextInt32(Random, 65537, 256 * 1024);
}
else
{
numDocs = TestUtil.NextInt32(Random, 100, 200);
}
var docBytes = new List<byte[]>();
DirectoryReader r = null;
try
{
using (IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)))
{
long totalBytes = 0;
for (int docID = 0; docID < numDocs; docID++)
{
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed)
{
numBytes = fixedLength;
}
else if (docID == 0 || Random.Next(5) == 3)
{
numBytes = TestUtil.NextInt32(Random, 65537, 3 * 1024 * 1024);
}
else
{
numBytes = TestUtil.NextInt32(Random, 1, 1024 * 1024);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024 * 1024)
{
break;
}
var bytes = new byte[numBytes];
Random.NextBytes(bytes);
docBytes.Add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.Length = bytes.Length;
doc.Add(new BinaryDocValuesField("field", b));
doc.Add(new StringField("id", "" + docID, Field.Store.YES));
try
{
w.AddDocument(doc);
}
catch (ArgumentException iae)
{
if (iae.Message.IndexOf("is too large", StringComparison.Ordinal) == -1)
{
throw /*iae*/; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
else
{
// OK: some codecs can't handle binary DV > 32K
Assert.IsFalse(CodecAcceptsHugeBinaryValues("field"));
w.Rollback();
d.Dispose();
directoryDisposed = true; // LUCENENET specific
return;
}
}
}
//DirectoryReader r; // LUCENENET: declaration moved outside w's using block
try
{
r = w.GetReader();
}
catch (ArgumentException iae)
{
if (iae.Message.IndexOf("is too large", StringComparison.Ordinal) == -1)
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
else
{
Assert.IsFalse(CodecAcceptsHugeBinaryValues("field"));
// OK: some codecs can't handle binary DV > 32K
w.Rollback();
d.Dispose();
directoryDisposed = true; // LUCENENET specific
return;
}
}
} // w.Dispose();
using (AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r))
{
BytesRef bytes = new BytesRef(); // LUCENENET: Moved outside of the loop for performance
BinaryDocValues s = FieldCache.DEFAULT.GetTerms(ar, "field", false);
for (int docID = 0; docID < docBytes.Count; docID++)
{
Document doc = ar.Document(docID);
s.Get(docID, bytes);
var expected = docBytes[Convert.ToInt32(doc.Get("id"), CultureInfo.InvariantCulture)];
Assert.AreEqual(expected.Length, bytes.Length);
Assert.AreEqual(new BytesRef(expected), bytes);
}
Assert.IsTrue(CodecAcceptsHugeBinaryValues("field"));
} // ar.Dispose();
}
finally
{
r?.Dispose(); // LUCENENET specific - small chance w.Dispose() will throw, this is just here to cover that case. It is safe to call r.Dispose() more than once.
}
}
finally
{
// LUCENENET: MMapDirectory is not safe to call dispose on twice (a bug?), so we
// need to ensure that if another path got it already that we don't do it again here.
if (!directoryDisposed)
d.Dispose();
}
}
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
[Test]
public virtual void TestHugeBinaryValueLimit()
{
// We only test DVFormats that have a limit
AssumeFalse("test requires codec with limits on max binary field length", CodecAcceptsHugeBinaryValues("field"));
Analyzer analyzer = new MockAnalyzer(Random);
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
using Directory d = NewFSDirectory(CreateTempDir("hugeBinaryValues"));
bool doFixed = Random.NextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed)
{
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.NextInt32(Random, 10, 20);
#pragma warning disable 612, 618
fixedLength = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
#pragma warning restore 612, 618
}
else
{
numDocs = TestUtil.NextInt32(Random, 100, 200);
}
var docBytes = new List<byte[]>();
DirectoryReader r = null;
try
{
using (IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)))
{
long totalBytes = 0;
for (int docID = 0; docID < numDocs; docID++)
{
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed)
{
numBytes = fixedLength;
}
else if (docID == 0 || Random.Next(5) == 3)
{
#pragma warning disable 612, 618
numBytes = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
}
else
{
numBytes = TestUtil.NextInt32(Random, 1, Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
#pragma warning restore 612, 618
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024 * 1024)
{
break;
}
var bytes = new byte[numBytes];
Random.NextBytes(bytes);
docBytes.Add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.Length = bytes.Length;
doc.Add(new BinaryDocValuesField("field", b));
doc.Add(new StringField("id", "" + docID, Field.Store.YES));
w.AddDocument(doc);
}
r = w.GetReader();
} // w.Dispose();
using (AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r))
{
BytesRef bytes = new BytesRef(); // LUCENENET: Moved outside of the loop for performance
BinaryDocValues s = FieldCache.DEFAULT.GetTerms(ar, "field", false);
for (int docID = 0; docID < docBytes.Count; docID++)
{
Document doc = ar.Document(docID);
s.Get(docID, bytes);
var expected = docBytes[Convert.ToInt32(doc.Get("id"), CultureInfo.InvariantCulture)];
Assert.AreEqual(expected.Length, bytes.Length);
Assert.AreEqual(new BytesRef(expected), bytes);
}
} // ar.Dispose();
}
finally
{
r?.Dispose(); // LUCENENET specific - small chance w.Dispose() will throw, this is just here to cover that case. It is safe to call r.Dispose() more than once.
}
}
/// <summary>
/// Tests dv against stored fields with threads (binary/numeric/sorted, no missing)
/// </summary>
[Test]
public virtual void TestThreads()
{
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", Arrays.Empty<byte>());
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
doc.Add(idField);
doc.Add(storedBinField);
doc.Add(dvBinField);
doc.Add(dvSortedField);
doc.Add(storedNumericField);
doc.Add(dvNumericField);
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
int length = TestUtil.NextInt32(Random, 0, 8);
var buffer = new byte[length];
Random.NextBytes(buffer);
storedBinField.SetBytesValue(buffer);
dvBinField.SetBytesValue(buffer);
dvSortedField.SetBytesValue(buffer);
long numericValue = Random.NextInt64();
storedNumericField.SetStringValue(Convert.ToString(numericValue, CultureInfo.InvariantCulture));
dvNumericField.SetInt64Value(numericValue);
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
int numThreads = TestUtil.NextInt32(Random, 2, 7);
ThreadJob[] threads = new ThreadJob[numThreads];
using CountdownEvent startingGun = new CountdownEvent(1);
for (int i = 0; i < threads.Length; i++)
{
threads[i] = new ThreadAnonymousClass(ir, startingGun);
threads[i].Start();
}
startingGun.Signal();
foreach (ThreadJob t in threads)
{
t.Join();
}
}
private class ThreadAnonymousClass : ThreadJob
{
private readonly DirectoryReader ir;
private readonly CountdownEvent startingGun;
public ThreadAnonymousClass(DirectoryReader ir, CountdownEvent startingGun)
{
this.ir = ir;
this.startingGun = startingGun;
}
public override void Run()
{
try
{
startingGun.Wait();
BytesRef scratch = new BytesRef(); // LUCENENET: Moved outside of the loop for performance
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
BinaryDocValues binaries = r.GetBinaryDocValues("dvBin");
SortedDocValues sorted = r.GetSortedDocValues("dvSorted");
NumericDocValues numerics = r.GetNumericDocValues("dvNum");
for (int j = 0; j < r.MaxDoc; j++)
{
BytesRef binaryValue = r.Document(j).GetBinaryValue("storedBin");
binaries.Get(j, scratch);
Assert.AreEqual(binaryValue, scratch);
sorted.Get(j, scratch);
Assert.AreEqual(binaryValue, scratch);
string expected = r.Document(j).Get("storedNum");
Assert.AreEqual(Convert.ToInt64(expected, CultureInfo.InvariantCulture), numerics.Get(j));
}
}
TestUtil.CheckReader(ir);
}
catch (Exception e)
{
throw new Exception(e.ToString(), e);
}
}
}
/// <summary>
/// Tests dv against stored fields with threads (all types + missing)
/// </summary>
[Test]
public virtual void TestThreads2()
{
AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField);
AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
using Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
using (RandomIndexWriter writer = new RandomIndexWriter(Random, dir, conf))
{
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", Arrays.Empty<byte>());
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
// index some docs
int numDocs = AtLeast(300);
for (int i = 0; i < numDocs; i++)
{
idField.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
int length = TestUtil.NextInt32(Random, 0, 8);
var buffer = new byte[length];
Random.NextBytes(buffer);
storedBinField.SetBytesValue(buffer);
dvBinField.SetBytesValue(buffer);
dvSortedField.SetBytesValue(buffer);
long numericValue = Random.NextInt64();
storedNumericField.SetStringValue(Convert.ToString(numericValue, CultureInfo.InvariantCulture));
dvNumericField.SetInt64Value(numericValue);
Document doc = new Document();
doc.Add(idField);
if (Random.Next(4) > 0)
{
doc.Add(storedBinField);
doc.Add(dvBinField);
doc.Add(dvSortedField);
}
if (Random.Next(4) > 0)
{
doc.Add(storedNumericField);
doc.Add(dvNumericField);
}
int numSortedSetFields = Random.Next(3);
// LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
JCG.SortedSet<string> values = new JCG.SortedSet<string>(StringComparer.Ordinal);
for (int j = 0; j < numSortedSetFields; j++)
{
values.Add(TestUtil.RandomSimpleString(Random));
}
foreach (string v in values)
{
doc.Add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
doc.Add(new StoredField("storedSortedSet", v));
}
writer.AddDocument(doc);
if (Random.Next(31) == 0)
{
writer.Commit();
}
}
// delete some docs
int numDeletions = Random.Next(numDocs / 10);
for (int i = 0; i < numDeletions; i++)
{
int id = Random.Next(numDocs);
writer.DeleteDocuments(new Term("id", Convert.ToString(id, CultureInfo.InvariantCulture)));
}
} // writer.Dispose();
// compare
using DirectoryReader ir = DirectoryReader.Open(dir);
int numThreads = TestUtil.NextInt32(Random, 2, 7);
ThreadJob[] threads = new ThreadJob[numThreads];
using CountdownEvent startingGun = new CountdownEvent(1);
for (int i = 0; i < threads.Length; i++)
{
threads[i] = new ThreadAnonymousClass2(ir, startingGun);
threads[i].Start();
}
startingGun.Signal();
foreach (ThreadJob t in threads)
{
t.Join();
}
}
private class ThreadAnonymousClass2 : ThreadJob
{
private readonly DirectoryReader ir;
private readonly CountdownEvent startingGun;
public ThreadAnonymousClass2(DirectoryReader ir, CountdownEvent startingGun)
{
this.ir = ir;
this.startingGun = startingGun;
}
public override void Run()
{
try
{
startingGun.Wait();
foreach (AtomicReaderContext context in ir.Leaves)
{
AtomicReader r = context.AtomicReader;
BinaryDocValues binaries = r.GetBinaryDocValues("dvBin");
IBits binaryBits = r.GetDocsWithField("dvBin");
SortedDocValues sorted = r.GetSortedDocValues("dvSorted");
IBits sortedBits = r.GetDocsWithField("dvSorted");
NumericDocValues numerics = r.GetNumericDocValues("dvNum");
IBits numericBits = r.GetDocsWithField("dvNum");
SortedSetDocValues sortedSet = r.GetSortedSetDocValues("dvSortedSet");
IBits sortedSetBits = r.GetDocsWithField("dvSortedSet");
for (int j = 0; j < r.MaxDoc; j++)
{
BytesRef binaryValue = r.Document(j).GetBinaryValue("storedBin");
if (binaryValue != null)
{
if (binaries != null)
{
BytesRef scratch = new BytesRef();
binaries.Get(j, scratch);
Assert.AreEqual(binaryValue, scratch);
sorted.Get(j, scratch);
Assert.AreEqual(binaryValue, scratch);
Assert.IsTrue(binaryBits.Get(j));
Assert.IsTrue(sortedBits.Get(j));
}
}
else if (binaries != null)
{
Assert.IsFalse(binaryBits.Get(j));
Assert.IsFalse(sortedBits.Get(j));
Assert.AreEqual(-1, sorted.GetOrd(j));
}
string number = r.Document(j).Get("storedNum");
if (number != null)
{
if (numerics != null)
{
Assert.AreEqual(Convert.ToInt64(number, CultureInfo.InvariantCulture), numerics.Get(j));
}
}
else if (numerics != null)
{
Assert.IsFalse(numericBits.Get(j));
Assert.AreEqual(0L, numerics.Get(j)); // LUCENENET specific - 0L required because types don't match (xUnit checks this)
}
string[] values = r.Document(j).GetValues("storedSortedSet");
if (values.Length > 0)
{
Assert.IsNotNull(sortedSet);
sortedSet.SetDocument(j);
for (int k = 0; k < values.Length; k++)
{
long ord = sortedSet.NextOrd();
Assert.IsTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
BytesRef value = new BytesRef();
sortedSet.LookupOrd(ord, value);
Assert.AreEqual(values[k], value.Utf8ToString());
}
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
Assert.IsTrue(sortedSetBits.Get(j));
}
else if (sortedSet != null)
{
sortedSet.SetDocument(j);
Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
Assert.IsFalse(sortedSetBits.Get(j));
}
}
}
TestUtil.CheckReader(ir);
}
catch (Exception e)
{
throw new Exception(e.ToString(), e);
}
}
}
// LUCENE-5218
[Test]
public virtual void TestEmptyBinaryValueOnPageSizes()
{
// Test larger and larger power-of-two sized values,
// followed by empty string value:
for (int i = 0; i < 20; i++)
{
if (i > 14 && CodecAcceptsHugeBinaryValues("field") == false)
{
break;
}
using Directory dir = NewDirectory();
IndexReader r = null;
try
{
using (RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir))
{
BytesRef bytes = new BytesRef();
bytes.Bytes = new byte[1 << i];
bytes.Length = 1 << i;
for (int j = 0; j < 4; j++)
{
Document doc_ = new Document();
doc_.Add(new BinaryDocValuesField("field", bytes));
w.AddDocument(doc_);
}
Document doc = new Document();
doc.Add(new StoredField("id", "5"));
doc.Add(new BinaryDocValuesField("field", new BytesRef()));
w.AddDocument(doc);
r = w.GetReader();
} // w.Dispose();
using AtomicReader ar = SlowCompositeReaderWrapper.Wrap(r);
BinaryDocValues values = ar.GetBinaryDocValues("field");
BytesRef result = new BytesRef();
for (int j = 0; j < 5; j++)
{
values.Get(0, result);
Assert.IsTrue(result.Length == 0 || result.Length == 1 << i);
}
}
finally
{
r?.Dispose(); // LUCENENET specific - small chance w.Dispose() will throw, this is just here to cover that case. It is safe to call r.Dispose() more than once.
}
}
}
protected virtual bool CodecAcceptsHugeBinaryValues(string field)
{
return true;
}
}
}