blob: fa45a4b4d3e88f07d60f198d686d6ccd561abea2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestIndexWriterMerging extends LuceneTestCase {
/**
* Tests that index merging (specifically addIndexes(Directory...)) doesn't
* change the index order of documents.
*/
public void testLucene() throws IOException {
int num=100;
Directory indexA = newDirectory();
Directory indexB = newDirectory();
fillIndex(random(), indexA, 0, num);
boolean fail = verifyIndex(indexA, 0);
if (fail)
{
fail("Index a is invalid");
}
fillIndex(random(), indexB, num, num);
fail = verifyIndex(indexB, num);
if (fail)
{
fail("Index b is invalid");
}
Directory merged = newDirectory();
IndexWriter writer = new IndexWriter(
merged,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy(2))
);
writer.addIndexes(indexA, indexB);
writer.forceMerge(1);
writer.close();
fail = verifyIndex(merged, 0);
assertFalse("The merged index is invalid", fail);
indexA.close();
indexB.close();
merged.close();
}
private boolean verifyIndex(Directory directory, int startAt) throws IOException
{
boolean fail = false;
IndexReader reader = DirectoryReader.open(directory);
int max = reader.maxDoc();
for (int i = 0; i < max; i++)
{
Document temp = reader.document(i);
//System.out.println("doc "+i+"="+temp.getField("count").stringValue());
//compare the index doc number to the value that it should be
if (!temp.getField("count").stringValue().equals((i + startAt) + ""))
{
fail = true;
System.out.println("Document " + (i + startAt) + " is returning document " + temp.getField("count").stringValue());
}
}
reader.close();
return fail;
}
private void fillIndex(Random random, Directory dir, int start, int numDocs) throws IOException {
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random))
.setOpenMode(OpenMode.CREATE)
.setMaxBufferedDocs(2)
.setMergePolicy(newLogMergePolicy(2))
);
for (int i = start; i < (start + numDocs); i++)
{
Document temp = new Document();
temp.add(newStringField("count", (""+i), Field.Store.YES));
writer.addDocument(temp);
}
writer.close();
}
// LUCENE-325: test forceMergeDeletes, when 2 singular merges
// are required
public void testForceMergeDeletes() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
Document document = new Document();
FieldType customType = new FieldType();
customType.setStored(true);
FieldType customType1 = new FieldType(TextField.TYPE_STORED);
customType1.setTokenized(false);
customType1.setStoreTermVectors(true);
customType1.setStoreTermVectorPositions(true);
customType1.setStoreTermVectorOffsets(true);
Field idField = newStringField("id", "", Field.Store.NO);
document.add(idField);
Field storedField = newField("stored", "stored", customType);
document.add(storedField);
Field termVectorField = newField("termVector", "termVector", customType1);
document.add(termVectorField);
for(int i=0;i<10;i++) {
idField.setStringValue("" + i);
writer.addDocument(document);
}
writer.close();
IndexReader ir = DirectoryReader.open(dir);
assertEquals(10, ir.maxDoc());
assertEquals(10, ir.numDocs());
ir.close();
IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE);
writer = new IndexWriter(dir, dontMergeConfig);
writer.deleteDocuments(new Term("id", "0"));
writer.deleteDocuments(new Term("id", "7"));
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(8, ir.numDocs());
ir.close();
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()));
assertEquals(8, writer.getDocStats().numDocs);
assertEquals(10, writer.getDocStats().maxDoc);
writer.forceMergeDeletes();
assertEquals(8, writer.getDocStats().numDocs);
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(8, ir.maxDoc());
assertEquals(8, ir.numDocs());
ir.close();
dir.close();
}
// LUCENE-325: test forceMergeDeletes, when many adjacent merges are required
public void testForceMergeDeletes2() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setMergePolicy(newLogMergePolicy(50))
);
Document document = new Document();
FieldType customType = new FieldType();
customType.setStored(true);
FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
customType1.setTokenized(false);
customType1.setStoreTermVectors(true);
customType1.setStoreTermVectorPositions(true);
customType1.setStoreTermVectorOffsets(true);
Field storedField = newField("stored", "stored", customType);
document.add(storedField);
Field termVectorField = newField("termVector", "termVector", customType1);
document.add(termVectorField);
Field idField = newStringField("id", "", Field.Store.NO);
document.add(idField);
for(int i=0;i<98;i++) {
idField.setStringValue("" + i);
writer.addDocument(document);
}
writer.close();
IndexReader ir = DirectoryReader.open(dir);
assertEquals(98, ir.maxDoc());
assertEquals(98, ir.numDocs());
ir.close();
IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE);
writer = new IndexWriter(dir, dontMergeConfig);
for(int i=0;i<98;i+=2) {
writer.deleteDocuments(new Term("id", "" + i));
}
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(49, ir.numDocs());
ir.close();
writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy(3))
);
assertEquals(49, writer.getDocStats().numDocs);
writer.forceMergeDeletes();
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(49, ir.maxDoc());
assertEquals(49, ir.numDocs());
ir.close();
dir.close();
}
// LUCENE-325: test forceMergeDeletes without waiting, when
// many adjacent merges are required
public void testForceMergeDeletes3() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setMergePolicy(newLogMergePolicy(50))
);
FieldType customType = new FieldType();
customType.setStored(true);
FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
customType1.setTokenized(false);
customType1.setStoreTermVectors(true);
customType1.setStoreTermVectorPositions(true);
customType1.setStoreTermVectorOffsets(true);
Document document = new Document();
Field storedField = newField("stored", "stored", customType);
document.add(storedField);
Field termVectorField = newField("termVector", "termVector", customType1);
document.add(termVectorField);
Field idField = newStringField("id", "", Field.Store.NO);
document.add(idField);
for(int i=0;i<98;i++) {
idField.setStringValue("" + i);
writer.addDocument(document);
}
writer.close();
IndexReader ir = DirectoryReader.open(dir);
assertEquals(98, ir.maxDoc());
assertEquals(98, ir.numDocs());
ir.close();
IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE);
writer = new IndexWriter(dir, dontMergeConfig);
for(int i=0;i<98;i+=2) {
writer.deleteDocuments(new Term("id", "" + i));
}
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(49, ir.numDocs());
ir.close();
writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy(3))
);
writer.forceMergeDeletes(false);
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(49, ir.maxDoc());
assertEquals(49, ir.numDocs());
ir.close();
dir.close();
}
// Just intercepts all merges & verifies that we are never
// merging a segment with >= 20 (maxMergeDocs) docs
private static class MyMergeScheduler extends MergeScheduler {
@Override
synchronized public void merge(MergeSource mergeSource, MergeTrigger trigger) throws IOException {
while(true) {
MergePolicy.OneMerge merge = mergeSource.getNextMerge();
if (merge == null) {
break;
}
int numDocs = 0;
for(int i=0;i<merge.segments.size();i++) {
int maxDoc = merge.segments.get(i).info.maxDoc();
numDocs += maxDoc;
assertTrue(maxDoc < 20);
}
mergeSource.merge(merge);
assertEquals(numDocs, merge.getMergeInfo().info.maxDoc());
}
}
@Override
public void close() {}
}
// LUCENE-1013
public void testSetMaxMergeDocs() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()))
.setMergeScheduler(new MyMergeScheduler())
.setMaxBufferedDocs(2)
.setMergePolicy(newLogMergePolicy());
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setMaxMergeDocs(20);
lmp.setMergeFactor(2);
IndexWriter iw = new IndexWriter(dir, conf);
Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
document.add(newField("tvtest", "a b c", customType));
for(int i=0;i<177;i++)
iw.addDocument(document);
iw.close();
dir.close();
}
@Slow
public void testNoWaitClose() throws Throwable {
Directory directory = newDirectory();
final Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setTokenized(false);
Field idField = newField("id", "", customType);
doc.add(idField);
for(int pass=0;pass<2;pass++) {
if (VERBOSE) {
System.out.println("TEST: pass=" + pass);
}
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())).
setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(2).
setMergePolicy(newLogMergePolicy()).
setCommitOnClose(false);
if (pass == 2) {
conf.setMergeScheduler(new SerialMergeScheduler());
}
IndexWriter writer = new IndexWriter(directory, conf);
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
for(int iter=0;iter<atLeast(3);iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + iter);
}
for(int j=0;j<199;j++) {
idField.setStringValue(Integer.toString(iter*201+j));
writer.addDocument(doc);
}
int delID = iter*199;
for(int j=0;j<20;j++) {
writer.deleteDocuments(new Term("id", Integer.toString(delID)));
delID += 5;
}
writer.commit();
// Force a bunch of merge threads to kick off so we
// stress out aborting them on close:
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2);
final IndexWriter finalWriter = writer;
final AtomicReference<Throwable> failure = new AtomicReference<>();
Thread t1 = new Thread() {
@Override
public void run() {
boolean done = false;
while(!done) {
for(int i=0;i<100;i++) {
try {
finalWriter.addDocument(doc);
} catch (AlreadyClosedException e) {
done = true;
break;
} catch (NullPointerException e) {
done = true;
break;
} catch (Throwable e) {
e.printStackTrace(System.out);
failure.set(e);
done = true;
break;
}
}
Thread.yield();
}
}
};
t1.start();
writer.close();
t1.join();
if (failure.get() != null) {
throw failure.get();
}
// Make sure reader can read
IndexReader reader = DirectoryReader.open(directory);
reader.close();
// Reopen
writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND)
.setMergePolicy(newLogMergePolicy())
.setCommitOnClose(false));
}
writer.close();
}
directory.close();
}
}