blob: 61a7417eda9b382d5dae9b29d4c4d885144a576e [file] [log] [blame]
using J2N.Threading;
using J2N.Threading.Atomic;
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Randomized.Generators;
using Lucene.Net.Support;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using Assert = Lucene.Net.TestFramework.Assert;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using BinaryDocValuesField = BinaryDocValuesField;
using BytesRef = Lucene.Net.Util.BytesRef;
using Directory = Lucene.Net.Store.Directory;
using Document = Documents.Document;
using IBits = Lucene.Net.Util.IBits;
using IOUtils = Lucene.Net.Util.IOUtils;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using NumericDocValuesField = NumericDocValuesField;
using Store = Field.Store;
using StringField = StringField;
using TestUtil = Lucene.Net.Util.TestUtil;
[SuppressCodecs("Appending", "Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45")]
[TestFixture]
public class TestMixedDocValuesUpdates : LuceneTestCase
{
[Test]
public virtual void TestManyReopensAndFields()
{
Directory dir = NewDirectory();
Random random = Random;
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
LogMergePolicy lmp = NewLogMergePolicy();
lmp.MergeFactor = 3; // merge often
conf.SetMergePolicy(lmp);
IndexWriter writer = new IndexWriter(dir, conf);
bool isNRT = random.NextBoolean();
DirectoryReader reader;
if (isNRT)
{
reader = DirectoryReader.Open(writer, true);
}
else
{
writer.Commit();
reader = DirectoryReader.Open(dir);
}
int numFields = random.Next(4) + 3; // 3-7
int numNDVFields = random.Next(numFields / 2) + 1; // 1-3
long[] fieldValues = new long[numFields];
bool[] fieldHasValue = new bool[numFields];
Arrays.Fill(fieldHasValue, true);
for (int i = 0; i < fieldValues.Length; i++)
{
fieldValues[i] = 1;
}
int numRounds = AtLeast(15);
int docID = 0;
for (int i = 0; i < numRounds; i++)
{
int numDocs = AtLeast(5);
// System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
for (int j = 0; j < numDocs; j++)
{
Document doc = new Document();
doc.Add(new StringField("id", "doc-" + docID, Store.NO));
doc.Add(new StringField("key", "all", Store.NO)); // update key
// add all fields with their current value
for (int f = 0; f < fieldValues.Length; f++)
{
if (f < numNDVFields)
{
doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
}
else
{
doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
}
}
writer.AddDocument(doc);
++docID;
}
// if field's value was unset before, unset it from all new added documents too
for (int field = 0; field < fieldHasValue.Length; field++)
{
if (!fieldHasValue[field])
{
if (field < numNDVFields)
{
writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
}
else
{
writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
}
}
}
int fieldIdx = random.Next(fieldValues.Length);
string updateField = "f" + fieldIdx;
if (random.NextBoolean())
{
// System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
fieldHasValue[fieldIdx] = false;
if (fieldIdx < numNDVFields)
{
writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
}
else
{
writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
}
}
else
{
fieldHasValue[fieldIdx] = true;
if (fieldIdx < numNDVFields)
{
writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
}
else
{
writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
}
// System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
}
if (random.NextDouble() < 0.2)
{
int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
// System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
}
// verify reader
if (!isNRT)
{
writer.Commit();
}
// System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
Assert.IsNotNull(newReader);
reader.Dispose();
reader = newReader;
// System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
BytesRef scratch = new BytesRef();
foreach (AtomicReaderContext context in reader.Leaves)
{
AtomicReader r = context.AtomicReader;
// System.out.println(((SegmentReader) r).getSegmentName());
IBits liveDocs = r.LiveDocs;
for (int field = 0; field < fieldValues.Length; field++)
{
string f = "f" + field;
BinaryDocValues bdv = r.GetBinaryDocValues(f);
NumericDocValues ndv = r.GetNumericDocValues(f);
IBits docsWithField = r.GetDocsWithField(f);
if (field < numNDVFields)
{
Assert.IsNotNull(ndv);
Assert.IsNull(bdv);
}
else
{
Assert.IsNull(ndv);
Assert.IsNotNull(bdv);
}
int maxDoc = r.MaxDoc;
for (int doc = 0; doc < maxDoc; doc++)
{
if (liveDocs == null || liveDocs.Get(doc))
{
// System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
if (fieldHasValue[field])
{
Assert.IsTrue(docsWithField.Get(doc));
if (field < numNDVFields)
{
Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
}
else
{
Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
}
}
else
{
Assert.IsFalse(docsWithField.Get(doc));
}
}
}
}
}
// System.out.println();
}
IOUtils.Dispose(writer, reader, dir);
}
[Test]
public virtual void TestStressMultiThreading()
{
Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
IndexWriter writer = new IndexWriter(dir, conf);
// create index
int numThreads = TestUtil.NextInt32(Random, 3, 6);
int numDocs = AtLeast(2000);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
doc.Add(new StringField("id", "doc" + i, Store.NO));
double group = Random.NextDouble();
string g;
if (group < 0.1)
{
g = "g0";
}
else if (group < 0.5)
{
g = "g1";
}
else if (group < 0.8)
{
g = "g2";
}
else
{
g = "g3";
}
doc.Add(new StringField("updKey", g, Store.NO));
for (int j = 0; j < numThreads; j++)
{
long value = Random.Next();
doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value)));
doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2
}
writer.AddDocument(doc);
}
CountdownEvent done = new CountdownEvent(numThreads);
AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100));
// same thread updates a field as well as reopens
ThreadJob[] threads = new ThreadJob[numThreads];
for (int i = 0; i < threads.Length; i++)
{
string f = "f" + i;
string cf = "cf" + i;
threads[i] = new ThreadAnonymousClass(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf);
}
foreach (ThreadJob t in threads)
{
t.Start();
}
done.Wait();
writer.Dispose();
DirectoryReader reader = DirectoryReader.Open(dir);
BytesRef scratch = new BytesRef();
foreach (AtomicReaderContext context in reader.Leaves)
{
AtomicReader r = context.AtomicReader;
for (int i = 0; i < numThreads; i++)
{
BinaryDocValues bdv = r.GetBinaryDocValues("f" + i);
NumericDocValues control = r.GetNumericDocValues("cf" + i);
IBits docsWithBdv = r.GetDocsWithField("f" + i);
IBits docsWithControl = r.GetDocsWithField("cf" + i);
IBits liveDocs = r.LiveDocs;
for (int j = 0; j < r.MaxDoc; j++)
{
if (liveDocs == null || liveDocs.Get(j))
{
Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j));
if (docsWithBdv.Get(j))
{
long ctrlValue = control.Get(j);
long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2;
// if (ctrlValue != bdvValue) {
// System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch);
// }
Assert.AreEqual(ctrlValue, bdvValue);
}
}
}
}
}
reader.Dispose();
dir.Dispose();
}
private class ThreadAnonymousClass : ThreadJob
{
private readonly TestMixedDocValuesUpdates outerInstance;
private readonly IndexWriter writer;
private readonly int numDocs;
private readonly CountdownEvent done;
private readonly AtomicInt32 numUpdates;
private readonly string f;
private readonly string cf;
public ThreadAnonymousClass(TestMixedDocValuesUpdates outerInstance, string str, IndexWriter writer, int numDocs, CountdownEvent done, AtomicInt32 numUpdates, string f, string cf)
: base(str)
{
this.outerInstance = outerInstance;
this.writer = writer;
this.numDocs = numDocs;
this.done = done;
this.numUpdates = numUpdates;
this.f = f;
this.cf = cf;
}
public override void Run()
{
DirectoryReader reader = null;
bool success = false;
try
{
Random random = Random;
while (numUpdates.GetAndDecrement() > 0)
{
double group = random.NextDouble();
Term t;
if (group < 0.1)
{
t = new Term("updKey", "g0");
}
else if (group < 0.5)
{
t = new Term("updKey", "g1");
}
else if (group < 0.8)
{
t = new Term("updKey", "g2");
}
else
{
t = new Term("updKey", "g3");
}
// System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t);
if (random.NextBoolean()) // sometimes unset a value
{
// System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=UNSET");
writer.UpdateBinaryDocValue(t, f, null);
writer.UpdateNumericDocValue(t, cf, null);
}
else
{
long updValue = random.Next();
// System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue);
writer.UpdateBinaryDocValue(t, f, TestBinaryDocValuesUpdates.ToBytes(updValue));
writer.UpdateNumericDocValue(t, cf, updValue * 2);
}
if (random.NextDouble() < 0.2)
{
// delete a random document
int doc = random.Next(numDocs);
// System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc);
writer.DeleteDocuments(new Term("id", "doc" + doc));
}
if (random.NextDouble() < 0.05) // commit every 20 updates on average
{
// System.out.println("[" + Thread.currentThread().getName() + "] commit");
writer.Commit();
}
if (random.NextDouble() < 0.1) // reopen NRT reader (apply updates), on average once every 10 updates
{
if (reader == null)
{
// System.out.println("[" + Thread.currentThread().getName() + "] open NRT");
reader = DirectoryReader.Open(writer, true);
}
else
{
// System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT");
DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader, writer, true);
if (r2 != null)
{
reader.Dispose();
reader = r2;
}
}
}
}
// System.out.println("[" + Thread.currentThread().getName() + "] DONE");
success = true;
}
catch (IOException e)
{
throw new Exception(e.ToString(), e);
}
finally
{
if (reader != null)
{
try
{
reader.Dispose();
}
catch (IOException e)
{
if (success) // suppress this exception only if there was another exception
{
throw new Exception(e.ToString(), e);
}
}
}
done.Signal();
}
}
}
[Test]
public virtual void TestUpdateDifferentDocsInDifferentGens()
{
// update same document multiple times across generations
Directory dir = NewDirectory();
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
conf.SetMaxBufferedDocs(4);
IndexWriter writer = new IndexWriter(dir, conf);
int numDocs = AtLeast(10);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
doc.Add(new StringField("id", "doc" + i, Store.NO));
long value = Random.Next();
doc.Add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.ToBytes(value)));
doc.Add(new NumericDocValuesField("cf", value * 2));
writer.AddDocument(doc);
}
int numGens = AtLeast(5);
BytesRef scratch = new BytesRef();
for (int i = 0; i < numGens; i++)
{
int doc = Random.Next(numDocs);
Term t = new Term("id", "doc" + doc);
long value = Random.NextInt64();
writer.UpdateBinaryDocValue(t, "f", TestBinaryDocValuesUpdates.ToBytes(value));
writer.UpdateNumericDocValue(t, "cf", value * 2);
DirectoryReader reader = DirectoryReader.Open(writer, true);
foreach (AtomicReaderContext context in reader.Leaves)
{
AtomicReader r = context.AtomicReader;
BinaryDocValues fbdv = r.GetBinaryDocValues("f");
NumericDocValues cfndv = r.GetNumericDocValues("cf");
for (int j = 0; j < r.MaxDoc; j++)
{
Assert.AreEqual(cfndv.Get(j), TestBinaryDocValuesUpdates.GetValue(fbdv, j, scratch) * 2);
}
}
reader.Dispose();
}
writer.Dispose();
dir.Dispose();
}
[Test]
[Slow]
public virtual void TestTonsOfUpdates()
{
// LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
Directory dir = NewDirectory();
Random random = Random;
IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
IndexWriter writer = new IndexWriter(dir, conf);
// test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
int numDocs = AtLeast(20000);
int numBinaryFields = AtLeast(5);
int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs
ISet<string> updateTerms = new JCG.HashSet<string>();
while (updateTerms.Count < numTerms)
{
updateTerms.Add(TestUtil.RandomSimpleString(random));
}
// System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);
// build a large index with many BDV fields and update terms
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10);
for (int j = 0; j < numUpdateTerms; j++)
{
doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO));
}
for (int j = 0; j < numBinaryFields; j++)
{
long val = random.Next();
doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
doc.Add(new NumericDocValuesField("cf" + j, val * 2));
}
writer.AddDocument(doc);
}
writer.Commit(); // commit so there's something to apply to
// set to flush every 2048 bytes (approximately every 12 updates), so we get
// many flushes during binary updates
writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
int numUpdates = AtLeast(100);
// System.out.println("numUpdates=" + numUpdates);
for (int i = 0; i < numUpdates; i++)
{
int field = random.Next(numBinaryFields);
Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms));
long value = random.Next();
writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
}
writer.Dispose();
DirectoryReader reader = DirectoryReader.Open(dir);
BytesRef scratch = new BytesRef();
foreach (AtomicReaderContext context in reader.Leaves)
{
for (int i = 0; i < numBinaryFields; i++)
{
AtomicReader r = context.AtomicReader;
BinaryDocValues f = r.GetBinaryDocValues("f" + i);
NumericDocValues cf = r.GetNumericDocValues("cf" + i);
for (int j = 0; j < r.MaxDoc; j++)
{
Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
}
}
}
reader.Dispose();
dir.Dispose();
}
}
}