blob: a8e7c6ac41562fe620ae4ebd17568f58fe3b4d52 [file] [log] [blame]
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.search.PhraseQuery;
public class TestAddIndexesNoOptimize extends LuceneTestCase {
public void testSimpleCase() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// two auxiliary directories
Directory aux = new RAMDirectory();
Directory aux2 = new RAMDirectory();
IndexWriter writer = null;
writer = newWriter(dir, true);
// add 100 documents
addDocs(writer, 100);
assertEquals(100, writer.maxDoc());
writer.close();
writer = newWriter(aux, true);
writer.setUseCompoundFile(false); // use one without a compound file
// add 40 documents in separate files
addDocs(writer, 40);
assertEquals(40, writer.maxDoc());
writer.close();
writer = newWriter(aux2, true);
// add 40 documents in compound files
addDocs2(writer, 50);
assertEquals(50, writer.maxDoc());
writer.close();
// test doc count before segments are merged
writer = newWriter(dir, false);
assertEquals(100, writer.maxDoc());
writer.addIndexesNoOptimize(new Directory[] { aux, aux2 });
assertEquals(190, writer.maxDoc());
writer.close();
// make sure the old index is correct
verifyNumDocs(aux, 40);
// make sure the new index is correct
verifyNumDocs(dir, 190);
// now add another set in.
Directory aux3 = new RAMDirectory();
writer = newWriter(aux3, true);
// add 40 documents
addDocs(writer, 40);
assertEquals(40, writer.maxDoc());
writer.close();
// test doc count before segments are merged/index is optimized
writer = newWriter(dir, false);
assertEquals(190, writer.maxDoc());
writer.addIndexesNoOptimize(new Directory[] { aux3 });
assertEquals(230, writer.maxDoc());
writer.close();
// make sure the new index is correct
verifyNumDocs(dir, 230);
verifyTermDocs(dir, new Term("content", "aaa"), 180);
verifyTermDocs(dir, new Term("content", "bbb"), 50);
// now optimize it.
writer = newWriter(dir, false);
writer.optimize();
writer.close();
// make sure the new index is correct
verifyNumDocs(dir, 230);
verifyTermDocs(dir, new Term("content", "aaa"), 180);
verifyTermDocs(dir, new Term("content", "bbb"), 50);
// now add a single document
Directory aux4 = new RAMDirectory();
writer = newWriter(aux4, true);
addDocs2(writer, 1);
writer.close();
writer = newWriter(dir, false);
assertEquals(230, writer.maxDoc());
writer.addIndexesNoOptimize(new Directory[] { aux4 });
assertEquals(231, writer.maxDoc());
writer.close();
verifyNumDocs(dir, 231);
verifyTermDocs(dir, new Term("content", "bbb"), 51);
}
public void testWithPendingDeletes() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, false);
writer.addIndexesNoOptimize(new Directory[] {aux});
// Adds 10 docs, then replaces them with another 10
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", "bbb " + i, Field.Store.NO,
Field.Index.ANALYZED));
writer.updateDocument(new Term("id", "" + (i%10)), doc);
}
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery();
q.add(new Term("content", "bbb"));
q.add(new Term("content", "14"));
writer.deleteDocuments(q);
writer.optimize();
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
public void testWithPendingDeletes2() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, false);
// Adds 10 docs, then replaces them with another 10
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", "bbb " + i, Field.Store.NO,
Field.Index.ANALYZED));
writer.updateDocument(new Term("id", "" + (i%10)), doc);
}
writer.addIndexesNoOptimize(new Directory[] {aux});
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery();
q.add(new Term("content", "bbb"));
q.add(new Term("content", "14"));
writer.deleteDocuments(q);
writer.optimize();
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
public void testWithPendingDeletes3() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, false);
// Adds 10 docs, then replaces them with another 10
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", "bbb " + i, Field.Store.NO,
Field.Index.ANALYZED));
writer.updateDocument(new Term("id", "" + (i%10)), doc);
}
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery();
q.add(new Term("content", "bbb"));
q.add(new Term("content", "14"));
writer.deleteDocuments(q);
writer.addIndexesNoOptimize(new Directory[] {aux});
writer.optimize();
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
// case 0: add self or exceed maxMergeDocs, expect exception
public void testAddSelf() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
IndexWriter writer = null;
writer = newWriter(dir, true);
// add 100 documents
addDocs(writer, 100);
assertEquals(100, writer.maxDoc());
writer.close();
writer = newWriter(aux, true);
writer.setUseCompoundFile(false); // use one without a compound file
writer.setMaxBufferedDocs(1000);
// add 140 documents in separate files
addDocs(writer, 40);
writer.close();
writer = newWriter(aux, true);
writer.setUseCompoundFile(false); // use one without a compound file
writer.setMaxBufferedDocs(1000);
addDocs(writer, 100);
writer.close();
writer = newWriter(dir, false);
try {
// cannot add self
writer.addIndexesNoOptimize(new Directory[] { aux, dir });
assertTrue(false);
}
catch (IllegalArgumentException e) {
assertEquals(100, writer.maxDoc());
}
writer.close();
// make sure the index is correct
verifyNumDocs(dir, 100);
}
// in all the remaining tests, make the doc count of the oldest segment
// in dir large so that it is never merged in addIndexesNoOptimize()
// case 1: no tail segments
public void testNoTailSegments() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, false);
writer.setMaxBufferedDocs(10);
writer.setMergeFactor(4);
addDocs(writer, 10);
writer.addIndexesNoOptimize(new Directory[] { aux });
assertEquals(1040, writer.maxDoc());
assertEquals(2, writer.getSegmentCount());
assertEquals(1000, writer.getDocCount(0));
writer.close();
// make sure the index is correct
verifyNumDocs(dir, 1040);
}
// case 2: tail segments, invariants hold, no copy
public void testNoCopySegments() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, false);
writer.setMaxBufferedDocs(9);
writer.setMergeFactor(4);
addDocs(writer, 2);
writer.addIndexesNoOptimize(new Directory[] { aux });
assertEquals(1032, writer.maxDoc());
assertEquals(2, writer.getSegmentCount());
assertEquals(1000, writer.getDocCount(0));
writer.close();
// make sure the index is correct
verifyNumDocs(dir, 1032);
}
// case 3: tail segments, invariants hold, copy, invariants hold
public void testNoMergeAfterCopy() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, false);
writer.setMaxBufferedDocs(10);
writer.setMergeFactor(4);
writer.addIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
assertEquals(1060, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
// make sure the index is correct
verifyNumDocs(dir, 1060);
}
// case 4: tail segments, invariants hold, copy, invariants not hold
public void testMergeAfterCopy() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
setUpDirs(dir, aux);
IndexReader reader = IndexReader.open(aux, false);
for (int i = 0; i < 20; i++) {
reader.deleteDocument(i);
}
assertEquals(10, reader.numDocs());
reader.close();
IndexWriter writer = newWriter(dir, false);
writer.setMaxBufferedDocs(4);
writer.setMergeFactor(4);
writer.addIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) });
assertEquals(1020, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
// make sure the index is correct
verifyNumDocs(dir, 1020);
}
// case 5: tail segments, invariants not hold
public void testMoreMerges() throws IOException {
// main directory
Directory dir = new RAMDirectory();
// auxiliary directory
Directory aux = new RAMDirectory();
Directory aux2 = new RAMDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(aux2, true);
writer.setMaxBufferedDocs(100);
writer.setMergeFactor(10);
writer.addIndexesNoOptimize(new Directory[] { aux });
assertEquals(30, writer.maxDoc());
assertEquals(3, writer.getSegmentCount());
writer.close();
IndexReader reader = IndexReader.open(aux, false);
for (int i = 0; i < 27; i++) {
reader.deleteDocument(i);
}
assertEquals(3, reader.numDocs());
reader.close();
reader = IndexReader.open(aux2, false);
for (int i = 0; i < 8; i++) {
reader.deleteDocument(i);
}
assertEquals(22, reader.numDocs());
reader.close();
writer = newWriter(dir, false);
writer.setMaxBufferedDocs(6);
writer.setMergeFactor(4);
writer.addIndexesNoOptimize(new Directory[] { aux, aux2 });
assertEquals(1025, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
// make sure the index is correct
verifyNumDocs(dir, 1025);
}
private IndexWriter newWriter(Directory dir, boolean create)
throws IOException {
final IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
writer.setMergePolicy(new LogDocMergePolicy());
return writer;
}
private void addDocs(IndexWriter writer, int numDocs) throws IOException {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new Field("content", "aaa", Field.Store.NO,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
}
private void addDocs2(IndexWriter writer, int numDocs) throws IOException {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new Field("content", "bbb", Field.Store.NO,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
}
private void verifyNumDocs(Directory dir, int numDocs) throws IOException {
IndexReader reader = IndexReader.open(dir, true);
assertEquals(numDocs, reader.maxDoc());
assertEquals(numDocs, reader.numDocs());
reader.close();
}
private void verifyTermDocs(Directory dir, Term term, int numDocs)
throws IOException {
IndexReader reader = IndexReader.open(dir, true);
TermDocs termDocs = reader.termDocs(term);
int count = 0;
while (termDocs.next())
count++;
assertEquals(numDocs, count);
reader.close();
}
private void setUpDirs(Directory dir, Directory aux) throws IOException {
IndexWriter writer = null;
writer = newWriter(dir, true);
writer.setMaxBufferedDocs(1000);
// add 1000 documents in 1 segment
addDocs(writer, 1000);
assertEquals(1000, writer.maxDoc());
assertEquals(1, writer.getSegmentCount());
writer.close();
writer = newWriter(aux, true);
writer.setUseCompoundFile(false); // use one without a compound file
writer.setMaxBufferedDocs(100);
writer.setMergeFactor(10);
// add 30 documents in 3 segments
for (int i = 0; i < 3; i++) {
addDocs(writer, 10);
writer.close();
writer = newWriter(aux, false);
writer.setUseCompoundFile(false); // use one without a compound file
writer.setMaxBufferedDocs(100);
writer.setMergeFactor(10);
}
assertEquals(30, writer.maxDoc());
assertEquals(3, writer.getSegmentCount());
writer.close();
}
// LUCENE-1270
public void testHangOnClose() throws IOException {
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
writer.setMergePolicy(new LogByteSizeMergePolicy());
writer.setMaxBufferedDocs(5);
writer.setUseCompoundFile(false);
writer.setMergeFactor(100);
Document doc = new Document();
doc.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
for(int i=0;i<60;i++)
writer.addDocument(doc);
writer.setMaxBufferedDocs(200);
Document doc2 = new Document();
doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
Field.Index.NO));
doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
Field.Index.NO));
doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
Field.Index.NO));
doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES,
Field.Index.NO));
for(int i=0;i<10;i++)
writer.addDocument(doc2);
writer.close();
Directory dir2 = new MockRAMDirectory();
writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
lmp.setMinMergeMB(0.0001);
writer.setMergePolicy(lmp);
writer.setMergeFactor(4);
writer.setUseCompoundFile(false);
writer.setMergeScheduler(new SerialMergeScheduler());
writer.addIndexesNoOptimize(new Directory[] {dir});
writer.close();
dir.close();
dir2.close();
}
// LUCENE-1642: make sure CFS of destination indexwriter
// is respected when copying tail segments
public void testTargetCFS() throws IOException {
Directory dir = new RAMDirectory();
IndexWriter writer = newWriter(dir, true);
writer.setUseCompoundFile(false);
addDocs(writer, 1);
writer.close();
Directory other = new RAMDirectory();
writer = newWriter(other, true);
writer.setUseCompoundFile(true);
writer.addIndexesNoOptimize(new Directory[] {dir});
assertTrue(writer.newestSegment().getUseCompoundFile());
writer.close();
}
}