blob: 6a632af413bc04e737230160c763ad4487abe1e3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
/**
* Base class aiming at testing {@link StoredFieldsFormat stored fields formats}.
* To test a new format, all you need is to register a new {@link Codec} which
* uses it and extend this class and override {@link #getCodec()}.
* @lucene.experimental
*/
public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormatTestCase {
@Override
protected void addRandomFields(Document d) {
final int numValues = random().nextInt(3);
for (int i = 0; i < numValues; ++i) {
d.add(new StoredField("f", TestUtil.randomSimpleString(random(), 100)));
}
}
public void testRandomStoredFields() throws IOException {
Directory dir = newDirectory();
Random rand = random();
RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(rand, 5, 20)));
//w.w.setNoCFSRatio(0.0);
final int docCount = atLeast(200);
final int fieldCount = TestUtil.nextInt(rand, 1, 5);
final List<Integer> fieldIDs = new ArrayList<>();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);
Field idField = newField("id", "", customType);
for(int i=0;i<fieldCount;i++) {
fieldIDs.add(i);
}
final Map<String,Document> docs = new HashMap<>();
if (VERBOSE) {
System.out.println("TEST: build index docCount=" + docCount);
}
FieldType customType2 = new FieldType();
customType2.setStored(true);
for(int i=0;i<docCount;i++) {
Document doc = new Document();
doc.add(idField);
final String id = ""+i;
idField.setStringValue(id);
docs.put(id, doc);
if (VERBOSE) {
System.out.println("TEST: add doc id=" + id);
}
for(int field: fieldIDs) {
final String s;
if (rand.nextInt(4) != 3) {
s = TestUtil.randomUnicodeString(rand, 1000);
doc.add(newField("f"+field, s, customType2));
} else {
s = null;
}
}
w.addDocument(doc);
if (rand.nextInt(50) == 17) {
// mixup binding of field name -> Number every so often
Collections.shuffle(fieldIDs, random());
}
if (rand.nextInt(5) == 3 && i > 0) {
final String delID = ""+rand.nextInt(i);
if (VERBOSE) {
System.out.println("TEST: delete doc id=" + delID);
}
w.deleteDocuments(new Term("id", delID));
docs.remove(delID);
}
}
if (VERBOSE) {
System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
}
if (docs.size() > 0) {
String[] idsList = docs.keySet().toArray(new String[docs.size()]);
for(int x=0;x<2;x++) {
DirectoryReader r = maybeWrapWithMergingReader(w.getReader());
IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("TEST: cycle x=" + x + " r=" + r);
}
int num = atLeast(100);
for(int iter=0;iter<num;iter++) {
String testID = idsList[rand.nextInt(idsList.length)];
if (VERBOSE) {
System.out.println("TEST: test id=" + testID);
}
TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
assertEquals(1, hits.totalHits.value);
Document doc = r.document(hits.scoreDocs[0].doc);
Document docExp = docs.get(testID);
for(int i=0;i<fieldCount;i++) {
assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i));
}
}
r.close();
w.forceMerge(1);
}
}
w.close();
dir.close();
}
// LUCENE-1727: make sure doc fields are stored in order
public void testStoredFieldsOrder() throws Throwable {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType customType = new FieldType();
customType.setStored(true);
doc.add(newField("zzz", "a b c", customType));
doc.add(newField("aaa", "a b c", customType));
doc.add(newField("zzz", "1 2 3", customType));
w.addDocument(doc);
IndexReader r = maybeWrapWithMergingReader(w.getReader());
Document doc2 = r.document(0);
Iterator<IndexableField> it = doc2.getFields().iterator();
assertTrue(it.hasNext());
Field f = (Field) it.next();
assertEquals(f.name(), "zzz");
assertEquals(f.stringValue(), "a b c");
assertTrue(it.hasNext());
f = (Field) it.next();
assertEquals(f.name(), "aaa");
assertEquals(f.stringValue(), "a b c");
assertTrue(it.hasNext());
f = (Field) it.next();
assertEquals(f.name(), "zzz");
assertEquals(f.stringValue(), "1 2 3");
assertFalse(it.hasNext());
r.close();
w.close();
d.close();
}
// LUCENE-1219
public void testBinaryFieldOffsetLength() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
byte[] b = new byte[50];
for(int i=0;i<50;i++)
b[i] = (byte) (i+77);
Document doc = new Document();
Field f = new StoredField("binary", b, 10, 17);
byte[] bx = f.binaryValue().bytes;
assertTrue(bx != null);
assertEquals(50, bx.length);
assertEquals(10, f.binaryValue().offset);
assertEquals(17, f.binaryValue().length);
doc.add(f);
w.addDocument(doc);
w.close();
IndexReader ir = DirectoryReader.open(dir);
Document doc2 = ir.document(0);
IndexableField f2 = doc2.getField("binary");
b = f2.binaryValue().bytes;
assertTrue(b != null);
assertEquals(17, b.length, 17);
assertEquals(87, b[0]);
ir.close();
dir.close();
}
public void testNumericField() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(500);
final Number[] answers = new Number[numDocs];
final Class<?>[] typeAnswers = new Class<?>[numDocs];
for(int id=0;id<numDocs;id++) {
Document doc = new Document();
final Field nf;
final Number answer;
final Class<?> typeAnswer;
if (random().nextBoolean()) {
// float/double
if (random().nextBoolean()) {
final float f = random().nextFloat();
answer = Float.valueOf(f);
nf = new StoredField("nf", f);
typeAnswer = Float.class;
} else {
final double d = random().nextDouble();
answer = Double.valueOf(d);
nf = new StoredField("nf", d);
typeAnswer = Double.class;
}
} else {
// int/long
if (random().nextBoolean()) {
final int i = random().nextInt();
answer = Integer.valueOf(i);
nf = new StoredField("nf", i);
typeAnswer = Integer.class;
} else {
final long l = random().nextLong();
answer = Long.valueOf(l);
nf = new StoredField("nf", l);
typeAnswer = Long.class;
}
}
doc.add(nf);
answers[id] = answer;
typeAnswers[id] = typeAnswer;
doc.add(new StoredField("id", id));
doc.add(new IntPoint("id", id));
doc.add(new NumericDocValuesField("id", id));
w.addDocument(doc);
}
final DirectoryReader r = maybeWrapWithMergingReader(w.getReader());
w.close();
assertEquals(numDocs, r.numDocs());
for(LeafReaderContext ctx : r.leaves()) {
final LeafReader sub = ctx.reader();
final NumericDocValues ids = DocValues.getNumeric(sub, "id");
for(int docID=0;docID<sub.numDocs();docID++) {
final Document doc = sub.document(docID);
final Field f = (Field) doc.getField("nf");
assertTrue("got f=" + f, f instanceof StoredField);
assertEquals(docID, ids.nextDoc());
assertEquals(answers[(int) ids.longValue()], f.numericValue());
}
}
r.close();
dir.close();
}
public void testIndexedBit() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType onlyStored = new FieldType();
onlyStored.setStored(true);
doc.add(new Field("field", "value", onlyStored));
doc.add(new StringField("field2", "value", Field.Store.YES));
w.addDocument(doc);
IndexReader r = maybeWrapWithMergingReader(w.getReader());
w.close();
assertEquals(IndexOptions.NONE, r.document(0).getField("field").fieldType().indexOptions());
assertNotNull(r.document(0).getField("field2").fieldType().indexOptions());
r.close();
dir.close();
}
public void testReadSkip() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
FieldType ft = new FieldType();
ft.setStored(true);
ft.freeze();
final String string = TestUtil.randomSimpleString(random(), 50);
final byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
final float f = random().nextFloat();
final double d = random().nextDouble();
List<Field> fields = Arrays.asList(
new Field("bytes", bytes, ft),
new Field("string", string, ft),
new StoredField("long", l),
new StoredField("int", i),
new StoredField("float", f),
new StoredField("double", d)
);
for (int k = 0; k < 100; ++k) {
Document doc = new Document();
for (Field fld : fields) {
doc.add(fld);
}
iw.w.addDocument(doc);
}
iw.commit();
final DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir));
final int docID = random().nextInt(100);
for (Field fld : fields) {
String fldName = fld.name();
final Document sDoc = reader.document(docID, Collections.singleton(fldName));
final IndexableField sField = sDoc.getField(fldName);
if (Field.class.equals(fld.getClass())) {
assertEquals(fld.binaryValue(), sField.binaryValue());
assertEquals(fld.stringValue(), sField.stringValue());
} else {
assertEquals(fld.numericValue(), sField.numericValue());
}
}
reader.close();
iw.close();
dir.close();
}
public void testEmptyDocs() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
// make sure that the fact that documents might be empty is not a problem
final Document emptyDoc = new Document();
final int numDocs = random().nextBoolean() ? 1 : atLeast(1000);
for (int i = 0; i < numDocs; ++i) {
iw.addDocument(emptyDoc);
}
iw.commit();
final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir));
for (int i = 0; i < numDocs; ++i) {
final Document doc = rd.document(i);
assertNotNull(doc);
assertTrue(doc.getFields().isEmpty());
}
rd.close();
iw.close();
dir.close();
}
public void testConcurrentReads() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
// make sure the readers are properly cloned
final Document doc = new Document();
final Field field = new StringField("fld", "", Store.YES);
doc.add(field);
final int numDocs = atLeast(1000);
for (int i = 0; i < numDocs; ++i) {
field.setStringValue("" + i);
iw.addDocument(doc);
}
iw.commit();
final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir));
final IndexSearcher searcher = new IndexSearcher(rd);
final int concurrentReads = atLeast(5);
final int readsPerThread = atLeast(50);
final List<Thread> readThreads = new ArrayList<>();
final AtomicReference<Exception> ex = new AtomicReference<>();
for (int i = 0; i < concurrentReads; ++i) {
readThreads.add(new Thread() {
int[] queries;
{
queries = new int[readsPerThread];
for (int i = 0; i < queries.length; ++i) {
queries[i] = random().nextInt(numDocs);
}
}
@Override
public void run() {
for (int q : queries) {
final Query query = new TermQuery(new Term("fld", "" + q));
try {
final TopDocs topDocs = searcher.search(query, 1);
if (topDocs.totalHits.value != 1) {
throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits.value);
}
final Document sdoc = rd.document(topDocs.scoreDocs[0].doc);
if (sdoc == null || sdoc.get("fld") == null) {
throw new IllegalStateException("Could not find document " + q);
}
if (!Integer.toString(q).equals(sdoc.get("fld"))) {
throw new IllegalStateException("Expected " + q + ", but got " + sdoc.get("fld"));
}
} catch (Exception e) {
ex.compareAndSet(null, e);
}
}
}
});
}
for (Thread thread : readThreads) {
thread.start();
}
for (Thread thread : readThreads) {
thread.join();
}
rd.close();
if (ex.get() != null) {
throw ex.get();
}
iw.close();
dir.close();
}
private byte[] randomByteArray(int length, int max) {
final byte[] result = new byte[length];
for (int i = 0; i < length; ++i) {
result[i] = (byte) random().nextInt(max);
}
return result;
}
public void testWriteReadMerge() throws IOException {
// get another codec, other than the default: so we are merging segments across different codecs
final Codec otherCodec;
if ("SimpleText".equals(Codec.getDefault().getName())) {
otherCodec = TestUtil.getDefaultCodec();
} else {
otherCodec = new SimpleTextCodec();
}
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
final int docCount = atLeast(200);
final byte[][][] data = new byte [docCount][][];
for (int i = 0; i < docCount; ++i) {
final int fieldCount = rarely()
? RandomNumbers.randomIntBetween(random(), 1, 500)
: RandomNumbers.randomIntBetween(random(), 1, 5);
data[i] = new byte[fieldCount][];
for (int j = 0; j < fieldCount; ++j) {
final int length = rarely()
? random().nextInt(1000)
: random().nextInt(10);
final int max = rarely() ? 256 : 2;
data[i][j] = randomByteArray(length, max);
}
}
final FieldType type = new FieldType(StringField.TYPE_STORED);
type.setIndexOptions(IndexOptions.NONE);
type.freeze();
IntPoint id = new IntPoint("id", 0);
StoredField idStored = new StoredField("id", 0);
for (int i = 0; i < data.length; ++i) {
Document doc = new Document();
doc.add(id);
doc.add(idStored);
id.setIntValue(i);
idStored.setIntValue(i);
for (int j = 0; j < data[i].length; ++j) {
Field f = new Field("bytes" + j, data[i][j], type);
doc.add(f);
}
iw.w.addDocument(doc);
if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
iw.w.close();
IndexWriterConfig iwConfNew = newIndexWriterConfig(new MockAnalyzer(random()));
// test merging against a non-compressing codec
if (iwConf.getCodec() == otherCodec) {
iwConfNew.setCodec(Codec.getDefault());
} else {
iwConfNew.setCodec(otherCodec);
}
iwConf = iwConfNew;
iw = new RandomIndexWriter(random(), dir, iwConf);
}
}
for (int i = 0; i < 10; ++i) {
final int min = random().nextInt(data.length);
final int max = min + random().nextInt(20);
iw.deleteDocuments(IntPoint.newRangeQuery("id", min, max-1));
}
iw.forceMerge(2); // force merges with deletions
iw.commit();
final DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
assertTrue(ir.numDocs() > 0);
int numDocs = 0;
for (int i = 0; i < ir.maxDoc(); ++i) {
final Document doc = ir.document(i);
if (doc == null) {
continue;
}
++ numDocs;
final int docId = doc.getField("id").numericValue().intValue();
assertEquals(data[docId].length + 1, doc.getFields().size());
for (int j = 0; j < data[docId].length; ++j) {
final byte[] arr = data[docId][j];
final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
final byte[] arr2 = BytesRef.deepCopyOf(arr2Ref).bytes;
assertArrayEquals(arr, arr2);
}
}
assertTrue(ir.numDocs() <= numDocs);
ir.close();
iw.deleteAll();
iw.commit();
iw.forceMerge(1);
iw.close();
dir.close();
}
/** A dummy filter reader that reverse the order of documents in stored fields. */
private static class DummyFilterLeafReader extends FilterLeafReader {
public DummyFilterLeafReader(LeafReader in) {
super(in);
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
super.document(maxDoc() - 1 - docID, visitor);
}
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
}
private static class DummyFilterDirectoryReader extends FilterDirectoryReader {
public DummyFilterDirectoryReader(DirectoryReader in) throws IOException {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new DummyFilterLeafReader(reader);
}
});
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return new DummyFilterDirectoryReader(in);
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
}
public void testMergeFilterReader() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(200);
final String[] stringValues = new String[10];
for (int i = 0; i < stringValues.length; ++i) {
stringValues[i] = RandomStrings.randomRealisticUnicodeOfLength(random(), 10);
}
Document[] docs = new Document[numDocs];
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new StringField("to_delete", random().nextBoolean() ? "yes" : "no", Store.NO));
doc.add(new StoredField("id", i));
doc.add(new StoredField("i", random().nextInt(50)));
doc.add(new StoredField("l", random().nextLong()));
doc.add(new StoredField("d", random().nextDouble()));
doc.add(new StoredField("f", random().nextFloat()));
doc.add(new StoredField("s", RandomPicks.randomFrom(random(), stringValues)));
doc.add(new StoredField("b", new BytesRef(RandomPicks.randomFrom(random(), stringValues))));
docs[i] = doc;
w.addDocument(doc);
}
if (random().nextBoolean()) {
w.deleteDocuments(new Term("to_delete", "yes"));
}
w.commit();
w.close();
DirectoryReader reader = new DummyFilterDirectoryReader(maybeWrapWithMergingReader(DirectoryReader.open(dir)));
Directory dir2 = newDirectory();
w = new RandomIndexWriter(random(), dir2);
TestUtil.addIndexesSlowly(w.w, reader);
reader.close();
dir.close();
reader = maybeWrapWithMergingReader(w.getReader());
for (int i = 0; i < reader.maxDoc(); ++i) {
final Document doc = reader.document(i);
final int id = doc.getField("id").numericValue().intValue();
final Document expected = docs[id];
assertEquals(expected.get("s"), doc.get("s"));
assertEquals(expected.getField("i").numericValue(), doc.getField("i").numericValue());
assertEquals(expected.getField("l").numericValue(), doc.getField("l").numericValue());
assertEquals(expected.getField("d").numericValue(), doc.getField("d").numericValue());
assertEquals(expected.getField("f").numericValue(), doc.getField("f").numericValue());
assertEquals(expected.getField("b").binaryValue(), doc.getField("b").binaryValue());
}
reader.close();
w.close();
TestUtil.checkIndex(dir2);
dir2.close();
}
@Nightly
public void testBigDocuments() throws IOException {
assumeWorkingMMapOnWindows();
// "big" as "much bigger than the chunk size"
// for this test we force a FS dir
// we can't just use newFSDirectory, because this test doesn't really index anything.
// so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments")));
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
}
final Document emptyDoc = new Document(); // emptyDoc
final Document bigDoc1 = new Document(); // lot of small fields
final Document bigDoc2 = new Document(); // 1 very big field
final Field idField = new StringField("id", "", Store.NO);
emptyDoc.add(idField);
bigDoc1.add(idField);
bigDoc2.add(idField);
final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
onlyStored.setIndexOptions(IndexOptions.NONE);
final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
final int numFields = RandomNumbers.randomIntBetween(random(), 500000, 1000000);
for (int i = 0; i < numFields; ++i) {
bigDoc1.add(smallField);
}
final Field bigField = new Field("fld", randomByteArray(RandomNumbers.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored);
bigDoc2.add(bigField);
final int numDocs = atLeast(5);
final Document[] docs = new Document[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
}
for (int i = 0; i < numDocs; ++i) {
idField.setStringValue("" + i);
iw.addDocument(docs[i]);
if (random().nextInt(numDocs) == 0) {
iw.commit();
}
}
iw.commit();
iw.forceMerge(1); // look at what happens when big docs are merged
final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir));
final IndexSearcher searcher = new IndexSearcher(rd);
for (int i = 0; i < numDocs; ++i) {
final Query query = new TermQuery(new Term("id", "" + i));
final TopDocs topDocs = searcher.search(query, 1);
assertEquals("" + i, 1, topDocs.totalHits.value);
final Document doc = rd.document(topDocs.scoreDocs[0].doc);
assertNotNull(doc);
final IndexableField[] fieldValues = doc.getFields("fld");
assertEquals(docs[i].getFields("fld").length, fieldValues.length);
if (fieldValues.length > 0) {
assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
}
}
rd.close();
iw.close();
dir.close();
}
public void testBulkMergeWithDeletes() throws IOException {
final int numDocs = atLeast(200);
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new StoredField("f", TestUtil.randomSimpleString(random())));
w.addDocument(doc);
}
final int deleteCount = TestUtil.nextInt(random(), 5, numDocs);
for (int i = 0; i < deleteCount; ++i) {
final int id = random().nextInt(numDocs);
w.deleteDocuments(new Term("id", Integer.toString(id)));
}
w.commit();
w.close();
w = new RandomIndexWriter(random(), dir);
w.forceMerge(TestUtil.nextInt(random(), 1, 3));
w.commit();
w.close();
TestUtil.checkIndex(dir);
dir.close();
}
/** mix up field numbers, merge, and check that data is correct */
public void testMismatchedFields() throws Exception {
Directory dirs[] = new Directory[10];
for (int i = 0; i < dirs.length; i++) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
for (int j = 0; j < 10; j++) {
// add fields where name=value (e.g. 3=3) so we can detect if stuff gets screwed up.
doc.add(new StringField(Integer.toString(j), Integer.toString(j), Field.Store.YES));
}
for (int j = 0; j < 10; j++) {
iw.addDocument(doc);
}
DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(iw));
// mix up fields explicitly
if (random().nextBoolean()) {
reader = new MismatchedDirectoryReader(reader, random());
}
dirs[i] = newDirectory();
IndexWriter adder = new IndexWriter(dirs[i], new IndexWriterConfig(null));
TestUtil.addIndexesSlowly(adder, reader);
adder.commit();
adder.close();
IOUtils.close(reader, iw, dir);
}
Directory everything = newDirectory();
IndexWriter iw = new IndexWriter(everything, new IndexWriterConfig(null));
iw.addIndexes(dirs);
iw.forceMerge(1);
LeafReader ir = getOnlyLeafReader(DirectoryReader.open(iw));
for (int i = 0; i < ir.maxDoc(); i++) {
Document doc = ir.document(i);
assertEquals(10, doc.getFields().size());
for (int j = 0; j < 10; j++) {
assertEquals(Integer.toString(j), doc.get(Integer.toString(j)));
}
}
IOUtils.close(iw, ir, everything);
IOUtils.close(dirs);
}
public void testRandomStoredFieldsWithIndexSort() throws Exception {
final SortField[] sortFields;
if (random().nextBoolean()) {
sortFields =
new SortField[]{
new SortField("sort-1", SortField.Type.LONG),
new SortField("sort-2", SortField.Type.INT)
};
} else {
sortFields = new SortField[]{new SortField("sort-1", SortField.Type.LONG)};
}
List<String> storedFields = new ArrayList<>();
int numFields = TestUtil.nextInt(random(), 1, 10);
for (int i = 0; i < numFields; i++) {
storedFields.add("f-" + i);
}
FieldType storeType = new FieldType(TextField.TYPE_STORED);
storeType.setStored(true);
Function<String, Document> documentFactory =
id -> {
Document doc = new Document();
doc.add(new StringField("id", id, random().nextBoolean() ? Store.YES : Store.NO));
if (random().nextInt(100) <= 5) {
Collections.shuffle(storedFields, random());
}
for (String fieldName : storedFields) {
if (random().nextBoolean()) {
String s = TestUtil.randomUnicodeString(random(), 100);
doc.add(newField(fieldName, s, storeType));
}
}
for (SortField sortField : sortFields) {
doc.add(
new NumericDocValuesField(
sortField.getField(), TestUtil.nextInt(random(), 0, 10000)));
}
return doc;
};
Map<String, Document> docs = new HashMap<>();
int numDocs = atLeast(100);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
docs.put(id, documentFactory.apply(id));
}
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 5, 20));
iwc.setIndexSort(new Sort(sortFields));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
List<String> addedIds = new ArrayList<>();
Runnable verifyStoreFields =
() -> {
if (addedIds.isEmpty()) {
return;
}
try (DirectoryReader reader = maybeWrapWithMergingReader(iw.getReader())) {
IndexSearcher searcher = new IndexSearcher(reader);
int iters = TestUtil.nextInt(random(), 1, 10);
for (int i = 0; i < iters; i++) {
String testID = addedIds.get(random().nextInt(addedIds.size()));
if (VERBOSE) {
System.out.println("TEST: test id=" + testID);
}
TopDocs hits = searcher.search(new TermQuery(new Term("id", testID)), 1);
assertEquals(1, hits.totalHits.value);
List<IndexableField> expectedFields =
docs.get(testID).getFields().stream()
.filter(f -> f.fieldType().stored())
.collect(Collectors.toList());
Document actualDoc = reader.document(hits.scoreDocs[0].doc);
assertEquals(expectedFields.size(), actualDoc.getFields().size());
for (IndexableField expectedField : expectedFields) {
IndexableField[] actualFields = actualDoc.getFields(expectedField.name());
assertEquals(1, actualFields.length);
assertEquals(expectedField.stringValue(), actualFields[0].stringValue());
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
};
final List<String> ids = new ArrayList<>(docs.keySet());
Collections.shuffle(ids, random());
for (String id : ids) {
if (random().nextInt(100) < 5) {
// add via foreign reader
IndexWriterConfig otherIwc = newIndexWriterConfig();
otherIwc.setIndexSort(new Sort(sortFields));
try (Directory otherDir = newDirectory();
RandomIndexWriter otherIw = new RandomIndexWriter(random(), otherDir, otherIwc)) {
otherIw.addDocument(docs.get(id));
try (DirectoryReader otherReader = otherIw.getReader()) {
TestUtil.addIndexesSlowly(iw.w, otherReader);
}
}
} else {
// add normally
iw.addDocument(docs.get(id));
}
addedIds.add(id);
if (random().nextInt(100) < 5) {
String deletingId = addedIds.remove(random().nextInt(addedIds.size()));
if (random().nextBoolean()) {
iw.deleteDocuments(new TermQuery(new Term("id", deletingId)));
addedIds.remove(deletingId);
} else {
final Document newDoc = documentFactory.apply(deletingId);
docs.put(deletingId, newDoc);
iw.updateDocument(new Term("id", deletingId), newDoc);
}
}
if (random().nextInt(100) < 5) {
verifyStoreFields.run();
}
if (random().nextInt(100) < 2) {
iw.forceMerge(TestUtil.nextInt(random(), 1, 3));
}
}
verifyStoreFields.run();
iw.forceMerge(TestUtil.nextInt(random(), 1, 3));
verifyStoreFields.run();
IOUtils.close(iw, dir);
}
}