blob: 5629c0a171bdb7556ae78ced724910af5bb55bd7 [file] [log] [blame]
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.Iterator;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.ByteDocValuesField;
import org.apache.lucene.document.DerefBytesDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntDocValuesField;
import org.apache.lucene.document.LongDocValuesField;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.ShortDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.index.IndexWriter; // javadoc
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
/** Silly class that randomizes the indexing experience. EG
* it may swap in a different merge policy/scheduler; may
* commit periodically; may or may not forceMerge in the end,
* may flush by doc count instead of RAM, etc.
*/
public class RandomIndexWriter implements Closeable {
public IndexWriter w;
private final Random r;
int docCount;
int flushAt;
private double flushAtFactor = 1.0;
private boolean getReaderCalled;
private final int fixedBytesLength;
private final long docValuesFieldPrefix;
private volatile boolean doDocValues;
private final Codec codec; // sugar
// Randomly calls Thread.yield so we mixup thread scheduling
private static final class MockIndexWriter extends IndexWriter {
private final Random r;
public MockIndexWriter(Random r, Directory dir, IndexWriterConfig conf) throws IOException {
super(dir, conf);
// TODO: this should be solved in a different way; Random should not be shared (!).
this.r = new Random(r.nextLong());
}
@Override
boolean testPoint(String name) {
if (r.nextInt(4) == 2)
Thread.yield();
return true;
}
}
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
public RandomIndexWriter(Random r, Directory dir) throws IOException {
this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r)));
}
/** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
public RandomIndexWriter(Random r, Directory dir, Analyzer a) throws IOException {
this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, a));
}
/** create a RandomIndexWriter with a random config */
public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) throws IOException {
this(r, dir, LuceneTestCase.newIndexWriterConfig(r, v, a));
}
/** create a RandomIndexWriter with the provided config */
public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException {
// TODO: this should be solved in a different way; Random should not be shared (!).
this.r = new Random(r.nextLong());
w = new MockIndexWriter(r, dir, c);
flushAt = _TestUtil.nextInt(r, 10, 1000);
codec = w.getConfig().getCodec();
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW config=" + w.getConfig());
System.out.println("codec default=" + codec.getName());
}
/* TODO: find some way to make this random...
* This must be fixed across all fixed bytes
* fields in one index. so if you open another writer
* this might change if I use r.nextInt(x)
* maybe we can peek at the existing files here?
*/
fixedBytesLength = 17;
// NOTE: this means up to 13 * 5 unique fields (we have
// 13 different DV types):
docValuesFieldPrefix = r.nextInt(5);
switchDoDocValues();
// Make sure we sometimes test indices that don't get
// any forced merges:
doRandomForceMerge = r.nextBoolean();
}
private void switchDoDocValues() {
// randomly enable / disable docValues
doDocValues = LuceneTestCase.rarely(r);
if (LuceneTestCase.VERBOSE) {
if (doDocValues) {
System.out.println("NOTE: RIW: turning on random DocValues fields");
}
}
}
/**
* Adds a Document.
* @see IndexWriter#addDocument(Iterable)
*/
public <T extends IndexableField> void addDocument(final Iterable<T> doc) throws IOException {
addDocument(doc, w.getAnalyzer());
}
public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) throws IOException {
if (doDocValues && doc instanceof Document) {
randomPerDocFieldValues((Document) doc);
}
if (r.nextInt(5) == 3) {
// TODO: maybe, we should simply buffer up added docs
// (but we need to clone them), and only when
// getReader, commit, etc. are called, we do an
// addDocuments? Would be better testing.
w.addDocuments(new Iterable<Iterable<T>>() {
@Override
public Iterator<Iterable<T>> iterator() {
return new Iterator<Iterable<T>>() {
boolean done;
@Override
public boolean hasNext() {
return !done;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public Iterable<T> next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
}, a);
} else {
w.addDocument(doc, a);
}
maybeCommit();
}
private BytesRef getFixedRandomBytes() {
final String randomUnicodeString = _TestUtil.randomFixedByteLengthUnicodeString(r, fixedBytesLength);
BytesRef fixedRef = new BytesRef(randomUnicodeString);
if (fixedRef.length > fixedBytesLength) {
fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength);
} else {
fixedRef.grow(fixedBytesLength);
fixedRef.length = fixedBytesLength;
}
return fixedRef;
}
private void randomPerDocFieldValues(Document doc) {
DocValues.Type[] values = DocValues.Type.values();
DocValues.Type type = values[r.nextInt(values.length)];
String name = "random_" + type.name() + "" + docValuesFieldPrefix;
if (doc.getField(name) != null) {
return;
}
final Field f;
switch (type) {
case BYTES_FIXED_DEREF:
f = new DerefBytesDocValuesField(name, getFixedRandomBytes(), true);
break;
case BYTES_VAR_DEREF:
f = new DerefBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false);
break;
case BYTES_FIXED_STRAIGHT:
f = new StraightBytesDocValuesField(name, getFixedRandomBytes(), true);
break;
case BYTES_VAR_STRAIGHT:
f = new StraightBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false);
break;
case BYTES_FIXED_SORTED:
f = new SortedBytesDocValuesField(name, getFixedRandomBytes(), true);
break;
case BYTES_VAR_SORTED:
f = new SortedBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false);
break;
case FLOAT_32:
f = new FloatDocValuesField(name, r.nextFloat());
break;
case FLOAT_64:
f = new DoubleDocValuesField(name, r.nextDouble());
break;
case VAR_INTS:
f = new PackedLongDocValuesField(name, r.nextLong());
break;
case FIXED_INTS_16:
// TODO: we should test negatives too?
f = new ShortDocValuesField(name, (short) r.nextInt(Short.MAX_VALUE));
break;
case FIXED_INTS_32:
f = new IntDocValuesField(name, r.nextInt());
break;
case FIXED_INTS_64:
f = new LongDocValuesField(name, r.nextLong());
break;
case FIXED_INTS_8:
// TODO: we should test negatives too?
f = new ByteDocValuesField(name, (byte) r.nextInt(128));
break;
default:
throw new IllegalArgumentException("no such type: " + type);
}
doc.add(f);
}
private void maybeCommit() throws IOException {
if (docCount++ == flushAt) {
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
}
w.commit();
flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
if (flushAtFactor < 2e6) {
// gradually but exponentially increase time b/w flushes
flushAtFactor *= 1.05;
}
switchDoDocValues();
}
}
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
w.addDocuments(docs);
maybeCommit();
}
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
w.updateDocuments(delTerm, docs);
maybeCommit();
}
/**
* Updates a document.
* @see IndexWriter#updateDocument(Term, Iterable)
*/
public <T extends IndexableField> void updateDocument(Term t, final Iterable<T> doc) throws IOException {
if (doDocValues) {
randomPerDocFieldValues((Document) doc);
}
if (r.nextInt(5) == 3) {
w.updateDocuments(t, new Iterable<Iterable<T>>() {
@Override
public Iterator<Iterable<T>> iterator() {
return new Iterator<Iterable<T>>() {
boolean done;
@Override
public boolean hasNext() {
return !done;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public Iterable<T> next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
});
} else {
w.updateDocument(t, doc);
}
maybeCommit();
}
public void addIndexes(Directory... dirs) throws IOException {
w.addIndexes(dirs);
}
public void addIndexes(IndexReader... readers) throws IOException {
w.addIndexes(readers);
}
public void deleteDocuments(Term term) throws IOException {
w.deleteDocuments(term);
}
public void deleteDocuments(Query q) throws IOException {
w.deleteDocuments(q);
}
public void commit() throws IOException {
w.commit();
switchDoDocValues();
}
public int numDocs() {
return w.numDocs();
}
public int maxDoc() {
return w.maxDoc();
}
public void deleteAll() throws IOException {
w.deleteAll();
}
public DirectoryReader getReader() throws IOException {
return getReader(true);
}
private boolean doRandomForceMerge = true;
private boolean doRandomForceMergeAssert = true;
public void forceMergeDeletes(boolean doWait) throws IOException {
w.forceMergeDeletes(doWait);
}
public void forceMergeDeletes() throws IOException {
w.forceMergeDeletes();
}
public void setDoRandomForceMerge(boolean v) {
doRandomForceMerge = v;
}
public void setDoRandomForceMergeAssert(boolean v) {
doRandomForceMergeAssert = v;
}
private void doRandomForceMerge() throws IOException {
if (doRandomForceMerge) {
final int segCount = w.getSegmentCount();
if (r.nextBoolean() || segCount == 0) {
// full forceMerge
w.forceMerge(1);
} else {
// partial forceMerge
final int limit = _TestUtil.nextInt(r, 1, segCount);
w.forceMerge(limit);
assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
}
}
switchDoDocValues();
}
public DirectoryReader getReader(boolean applyDeletions) throws IOException {
getReaderCalled = true;
if (r.nextInt(20) == 2) {
doRandomForceMerge();
}
if (!applyDeletions || r.nextBoolean()) {
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.getReader: use NRT reader");
}
if (r.nextInt(5) == 1) {
w.commit();
}
return w.getReader(applyDeletions);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.getReader: open new reader");
}
w.commit();
switchDoDocValues();
if (r.nextBoolean()) {
return DirectoryReader.open(w.getDirectory(), _TestUtil.nextInt(r, 1, 10));
} else {
return w.getReader(applyDeletions);
}
}
}
/**
* Close this writer.
* @see IndexWriter#close()
*/
public void close() throws IOException {
// if someone isn't using getReader() API, we want to be sure to
// forceMerge since presumably they might open a reader on the dir.
if (getReaderCalled == false && r.nextInt(8) == 2) {
doRandomForceMerge();
}
w.close();
}
/**
* Forces a forceMerge.
* <p>
* NOTE: this should be avoided in tests unless absolutely necessary,
* as it will result in less test coverage.
* @see IndexWriter#forceMerge(int)
*/
public void forceMerge(int maxSegmentCount) throws IOException {
w.forceMerge(maxSegmentCount);
}
}