blob: 852ce532ec439b19a633ce88fa3205d74afe421e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestIndexWriterCommit extends LuceneTestCase {
/*
* Simple test for "commit on close": open writer then
* add a bunch of docs, making sure reader does not see
* these docs until writer is closed.
*/
public void testCommitOnClose() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
for (int i = 0; i < 14; i++) {
TestIndexWriter.addDoc(writer);
}
writer.close();
Term searchTerm = new Term("content", "aaa");
DirectoryReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("first number of hits", 14, hits.length);
reader.close();
reader = DirectoryReader.open(dir);
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
for(int i=0;i<3;i++) {
for(int j=0;j<11;j++) {
TestIndexWriter.addDoc(writer);
}
IndexReader r = DirectoryReader.open(dir);
searcher = newSearcher(r);
hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("reader incorrectly sees changes from writer", 14, hits.length);
r.close();
assertTrue("reader should have still been current", reader.isCurrent());
}
// Now, close the writer:
writer.close();
assertFalse("reader should not be current now", reader.isCurrent());
IndexReader r = DirectoryReader.open(dir);
searcher = newSearcher(r);
hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("reader did not see changes after writer was closed", 47, hits.length);
r.close();
reader.close();
dir.close();
}
/*
* Simple test for "commit on close": open writer, then
* add a bunch of docs, making sure reader does not see
* them until writer has closed. Then instead of
* closing the writer, call abort and verify reader sees
* nothing was added. Then verify we can open the index
* and add docs to it.
*/
public void testCommitOnCloseAbort() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(10));
for (int i = 0; i < 14; i++) {
TestIndexWriter.addDoc(writer);
}
writer.close();
Term searchTerm = new Term("content", "aaa");
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("first number of hits", 14, hits.length);
reader.close();
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND)
.setMaxBufferedDocs(10));
for(int j=0;j<17;j++) {
TestIndexWriter.addDoc(writer);
}
// Delete all docs:
writer.deleteDocuments(searchTerm);
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("reader incorrectly sees changes from writer", 14, hits.length);
reader.close();
// Now, close the writer:
writer.rollback();
TestIndexWriter.assertNoUnreferencedFiles(dir, "unreferenced files remain after rollback()");
reader = DirectoryReader.open(dir);
searcher = newSearcher(reader);
hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("saw changes after writer.abort", 14, hits.length);
reader.close();
// Now make sure we can re-open the index, add docs,
// and all is good:
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND)
.setMaxBufferedDocs(10));
for(int i=0;i<12;i++) {
for(int j=0;j<17;j++) {
TestIndexWriter.addDoc(writer);
}
IndexReader r = DirectoryReader.open(dir);
searcher = newSearcher(r);
hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("reader incorrectly sees changes from writer", 14, hits.length);
r.close();
}
writer.close();
IndexReader r = DirectoryReader.open(dir);
searcher = newSearcher(r);
hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs;
assertEquals("didn't see changes after close", 218, hits.length);
r.close();
dir.close();
}
/*
* Verify that a writer with "commit on close" indeed
* cleans up the temp segments created after opening
* that are not referenced by the starting segments
* file. We check this by using MockDirectoryWrapper to
* measure max temp disk space used.
*/
// TODO: can this write less docs/indexes?
@Nightly
public void testCommitOnCloseDiskUsage() throws IOException {
// MemoryCodec, since it uses FST, is not necessarily
// "additive", ie if you add up N small FSTs, then merge
// them, the merged result can easily be larger than the
// sum because the merged FST may use array encoding for
// some arcs (which uses more space):
final String idFormat = TestUtil.getPostingsFormat("id");
final String contentFormat = TestUtil.getPostingsFormat("content");
MockDirectoryWrapper dir = newMockDirectory();
Analyzer analyzer;
if (random().nextBoolean()) {
// no payloads
analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, true));
}
};
} else {
// fixed length payloads
final int length = random().nextInt(200);
analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(random(), tokenizer, length));
}
};
}
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(analyzer)
.setMaxBufferedDocs(10)
.setReaderPooling(false)
.setMergePolicy(newLogMergePolicy(10))
);
for(int j=0;j<30;j++) {
TestIndexWriter.addDocWithIndex(writer, j);
}
writer.close();
dir.resetMaxUsedSizeInBytes();
dir.setTrackDiskUsage(true);
long startDiskUsage = dir.getMaxUsedSizeInBytes();
writer = new IndexWriter(
dir,
newIndexWriterConfig(analyzer)
.setOpenMode(OpenMode.APPEND)
.setMaxBufferedDocs(10)
.setMergeScheduler(new SerialMergeScheduler())
.setReaderPooling(false)
.setMergePolicy(newLogMergePolicy(10))
);
for(int j=0;j<1470;j++) {
TestIndexWriter.addDocWithIndex(writer, j);
}
long midDiskUsage = dir.getMaxUsedSizeInBytes();
dir.resetMaxUsedSizeInBytes();
writer.forceMerge(1);
writer.close();
DirectoryReader.open(dir).close();
long endDiskUsage = dir.getMaxUsedSizeInBytes();
// Ending index is 50X as large as starting index; due
// to 3X disk usage normally we allow 150X max
// transient usage. If something is wrong w/ deleter
// and it doesn't delete intermediate segments then it
// will exceed this 150X:
// System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
assertTrue("writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage*150),
midDiskUsage < 150*startDiskUsage);
assertTrue("writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage*150),
endDiskUsage < 150*startDiskUsage);
dir.close();
}
/*
* Verify that calling forceMerge when writer is open for
* "commit on close" works correctly both for rollback()
* and close().
*/
public void testCommitOnCloseForceMerge() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(10)
.setMergePolicy(newLogMergePolicy(10))
);
for(int j=0;j<17;j++) {
TestIndexWriter.addDocWithIndex(writer, j);
}
writer.close();
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND));
writer.forceMerge(1);
// Open a reader before closing (commiting) the writer:
DirectoryReader reader = DirectoryReader.open(dir);
// Reader should see index as multi-seg at this
// point:
assertTrue("Reader incorrectly sees one segment", reader.leaves().size() > 1);
reader.close();
// Abort the writer:
writer.rollback();
TestIndexWriter.assertNoUnreferencedFiles(dir, "aborted writer after forceMerge");
// Open a reader after aborting writer:
reader = DirectoryReader.open(dir);
// Reader should still see index as multi-segment
assertTrue("Reader incorrectly sees one segment", reader.leaves().size() > 1);
reader.close();
if (VERBOSE) {
System.out.println("TEST: do real full merge");
}
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND));
writer.forceMerge(1);
writer.close();
if (VERBOSE) {
System.out.println("TEST: writer closed");
}
TestIndexWriter.assertNoUnreferencedFiles(dir, "aborted writer after forceMerge");
// Open a reader after aborting writer:
reader = DirectoryReader.open(dir);
// Reader should see index as one segment
assertEquals("Reader incorrectly sees more than one segment", 1, reader.leaves().size());
reader.close();
dir.close();
}
// LUCENE-2095: make sure with multiple threads commit
// doesn't return until all changes are in fact in the
// index
public void testCommitThreadSafety() throws Throwable {
final int NUM_THREADS = 5;
final double RUN_SEC = 0.5;
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()));
TestUtil.reduceOpenFiles(w.w);
w.commit();
final AtomicBoolean failed = new AtomicBoolean();
Thread[] threads = new Thread[NUM_THREADS];
final long endTime = System.currentTimeMillis()+((long) (RUN_SEC*1000));
for(int i=0;i<NUM_THREADS;i++) {
final int finalI = i;
threads[i] = new Thread() {
@Override
public void run() {
try {
final Document doc = new Document();
DirectoryReader r = DirectoryReader.open(dir);
Field f = newStringField("f", "", Field.Store.NO);
doc.add(f);
int count = 0;
do {
if (failed.get()) break;
for(int j=0;j<10;j++) {
final String s = finalI + "_" + String.valueOf(count++);
f.setStringValue(s);
w.addDocument(doc);
w.commit();
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
assertTrue(r2 != r);
r.close();
r = r2;
assertEquals("term=f:" + s + "; r=" + r, 1, r.docFreq(new Term("f", s)));
}
} while(System.currentTimeMillis() < endTime);
r.close();
} catch (Throwable t) {
failed.set(true);
throw new RuntimeException(t);
}
}
};
threads[i].start();
}
for(int i=0;i<NUM_THREADS;i++) {
threads[i].join();
}
assertFalse(failed.get());
w.close();
dir.close();
}
// LUCENE-1044: test writer.commit() when ac=false
public void testForceCommit() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
.setMergePolicy(newLogMergePolicy(5))
);
writer.commit();
for (int i = 0; i < 23; i++)
TestIndexWriter.addDoc(writer);
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(0, reader.numDocs());
writer.commit();
DirectoryReader reader2 = DirectoryReader.openIfChanged(reader);
assertNotNull(reader2);
assertEquals(0, reader.numDocs());
assertEquals(23, reader2.numDocs());
reader.close();
for (int i = 0; i < 17; i++)
TestIndexWriter.addDoc(writer);
assertEquals(23, reader2.numDocs());
reader2.close();
reader = DirectoryReader.open(dir);
assertEquals(23, reader.numDocs());
reader.close();
writer.commit();
reader = DirectoryReader.open(dir);
assertEquals(40, reader.numDocs());
reader.close();
writer.close();
dir.close();
}
public void testFutureCommit() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));
Document doc = new Document();
w.addDocument(doc);
// commit to "first"
Map<String,String> commitData = new HashMap<>();
commitData.put("tag", "first");
w.setLiveCommitData(commitData.entrySet());
w.commit();
// commit to "second"
w.addDocument(doc);
commitData.put("tag", "second");
w.setLiveCommitData(commitData.entrySet());
w.close();
// open "first" with IndexWriter
IndexCommit commit = null;
for(IndexCommit c : DirectoryReader.listCommits(dir)) {
if (c.getUserData().get("tag").equals("first")) {
commit = c;
break;
}
}
assertNotNull(commit);
w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)
.setIndexCommit(commit));
assertEquals(1, w.getDocStats().numDocs);
// commit IndexWriter to "third"
w.addDocument(doc);
commitData.put("tag", "third");
w.setLiveCommitData(commitData.entrySet());
w.close();
// make sure "second" commit is still there
commit = null;
for(IndexCommit c : DirectoryReader.listCommits(dir)) {
if (c.getUserData().get("tag").equals("second")) {
commit = c;
break;
}
}
assertNotNull(commit);
dir.close();
}
public void testZeroCommits() throws Exception {
// Tests that if we don't call commit(), the directory has 0 commits. This has
// changed since LUCENE-2386, where before IW would always commit on a fresh
// new index.
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
expectThrows(IndexNotFoundException.class, () -> {
DirectoryReader.listCommits(dir);
});
// No changes still should generate a commit, because it's a new index.
writer.close();
assertEquals("expected 1 commits!", 1, DirectoryReader.listCommits(dir).size());
dir.close();
}
// LUCENE-1274: test writer.prepareCommit()
public void testPrepareCommit() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
.setMergePolicy(newLogMergePolicy(5))
);
writer.commit();
for (int i = 0; i < 23; i++)
TestIndexWriter.addDoc(writer);
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(0, reader.numDocs());
writer.prepareCommit();
IndexReader reader2 = DirectoryReader.open(dir);
assertEquals(0, reader2.numDocs());
writer.commit();
IndexReader reader3 = DirectoryReader.openIfChanged(reader);
assertNotNull(reader3);
assertEquals(0, reader.numDocs());
assertEquals(0, reader2.numDocs());
assertEquals(23, reader3.numDocs());
reader.close();
reader2.close();
for (int i = 0; i < 17; i++)
TestIndexWriter.addDoc(writer);
assertEquals(23, reader3.numDocs());
reader3.close();
reader = DirectoryReader.open(dir);
assertEquals(23, reader.numDocs());
reader.close();
writer.prepareCommit();
reader = DirectoryReader.open(dir);
assertEquals(23, reader.numDocs());
reader.close();
writer.commit();
reader = DirectoryReader.open(dir);
assertEquals(40, reader.numDocs());
reader.close();
writer.close();
dir.close();
}
// LUCENE-1274: test writer.prepareCommit()
public void testPrepareCommitRollback() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
.setMergePolicy(newLogMergePolicy(5))
);
writer.commit();
for (int i = 0; i < 23; i++) {
TestIndexWriter.addDoc(writer);
}
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(0, reader.numDocs());
writer.prepareCommit();
IndexReader reader2 = DirectoryReader.open(dir);
assertEquals(0, reader2.numDocs());
writer.rollback();
IndexReader reader3 = DirectoryReader.openIfChanged(reader);
assertNull(reader3);
assertEquals(0, reader.numDocs());
assertEquals(0, reader2.numDocs());
reader.close();
reader2.close();
// System.out.println("TEST: after rollback: " + Arrays.toString(dir.listAll()));
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
for (int i = 0; i < 17; i++) {
TestIndexWriter.addDoc(writer);
}
reader = DirectoryReader.open(dir);
assertEquals(0, reader.numDocs());
reader.close();
writer.prepareCommit();
reader = DirectoryReader.open(dir);
assertEquals(0, reader.numDocs());
reader.close();
writer.commit();
reader = DirectoryReader.open(dir);
assertEquals(17, reader.numDocs());
reader.close();
writer.close();
dir.close();
}
// LUCENE-1274
public void testPrepareCommitNoChanges() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
writer.prepareCommit();
writer.commit();
writer.close();
IndexReader reader = DirectoryReader.open(dir);
assertEquals(0, reader.numDocs());
reader.close();
dir.close();
}
// LUCENE-1382
public void testCommitUserData() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2));
for(int j=0;j<17;j++)
TestIndexWriter.addDoc(w);
w.close();
DirectoryReader r = DirectoryReader.open(dir);
// commit(Map) never called for this index
assertEquals(0, r.getIndexCommit().getUserData().size());
r.close();
w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2));
for(int j=0;j<17;j++)
TestIndexWriter.addDoc(w);
Map<String,String> data = new HashMap<>();
data.put("label", "test1");
w.setLiveCommitData(data.entrySet());
w.close();
r = DirectoryReader.open(dir);
assertEquals("test1", r.getIndexCommit().getUserData().get("label"));
r.close();
w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
w.forceMerge(1);
w.close();
dir.close();
}
public void testPrepareCommitThenClose() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
w.addDocument(new Document());
w.prepareCommit();
expectThrows(IllegalStateException.class, () -> {
w.close();
});
w.commit();
w.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals(1, r.maxDoc());
r.close();
dir.close();
}
// LUCENE-7335: make sure commit data is late binding
public void testCommitDataIsLive() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
w.addDocument(new Document());
final Map<String,String> commitData = new HashMap<>();
commitData.put("foo", "bar");
// make sure "foo" / "bar" doesn't take
w.setLiveCommitData(commitData.entrySet());
commitData.clear();
commitData.put("boo", "baz");
// this finally does the commit, and should burn "boo" / "baz"
w.close();
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
assertEquals(1, commits.size());
IndexCommit commit = commits.get(0);
Map<String,String> data = commit.getUserData();
assertEquals(1, data.size());
assertEquals("baz", data.get("boo"));
dir.close();
}
}