blob: 54ec8539717ea4be4d6018fd52896e189d426de8 [file] [log] [blame]
Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 515500)
+++ CHANGES.txt (working copy)
@@ -20,6 +20,15 @@
classes, package-private again (they were unnecessarily made public
as part of LUCENE-701). (Mike McCandless)
+ 3. LUCENE-710: added optional autoCommit boolean to IndexWriter
+ constructors. When this is false, index changes are not committed
+ until the writer is closed. This gives explicit control over when
+ a reader will see the changes. Also added optional custom
+ deletion policy to explicitly control when prior commits are
+ removed from the index. This is intended to allow applications to
+ share an index over NFS by customizing when prior commits are
+ deleted. (Mike McCandless)
+
Bug fixes
1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist. (Doron Cohen)
Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 515500)
+++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (working copy)
@@ -68,7 +68,7 @@
if (realUsage > dir.maxUsedSize) {
dir.maxUsedSize = realUsage;
}
- throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes");
+ throw new IOException("fake disk full at " + dir.getRecomputedActualSizeInBytes() + " bytes");
} else {
super.flushBuffer(src, len);
}
Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 515500)
+++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy)
@@ -25,175 +25,259 @@
"Venice has lots of canals" };
String[] text = { "Amsterdam", "Venice" };
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setUseCompoundFile(true);
- modifier.setMaxBufferedDeleteTerms(1);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- for (int i = 0; i < keywords.length; i++) {
- Document doc = new Document();
- doc.add(new Field("id", keywords[i], Field.Store.YES,
- Field.Index.UN_TOKENIZED));
- doc.add(new Field("country", unindexed[i], Field.Store.YES,
- Field.Index.NO));
- doc.add(new Field("contents", unstored[i], Field.Store.NO,
- Field.Index.TOKENIZED));
- doc
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setUseCompoundFile(true);
+ modifier.setMaxBufferedDeleteTerms(1);
+
+ for (int i = 0; i < keywords.length; i++) {
+ Document doc = new Document();
+ doc.add(new Field("id", keywords[i], Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ doc.add(new Field("country", unindexed[i], Field.Store.YES,
+ Field.Index.NO));
+ doc.add(new Field("contents", unstored[i], Field.Store.NO,
+ Field.Index.TOKENIZED));
+ doc
.add(new Field("city", text[i], Field.Store.YES,
- Field.Index.TOKENIZED));
- modifier.addDocument(doc);
- }
- modifier.optimize();
+ Field.Index.TOKENIZED));
+ modifier.addDocument(doc);
+ }
+ modifier.optimize();
- Term term = new Term("city", "Amsterdam");
- int hitCount = getHitCount(dir, term);
- assertEquals(1, hitCount);
- modifier.deleteDocuments(term);
- hitCount = getHitCount(dir, term);
- assertEquals(0, hitCount);
+ if (!autoCommit) {
+ modifier.close();
+ }
- modifier.close();
+ Term term = new Term("city", "Amsterdam");
+ int hitCount = getHitCount(dir, term);
+ assertEquals(1, hitCount);
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+ modifier.setUseCompoundFile(true);
+ }
+ modifier.deleteDocuments(term);
+ if (!autoCommit) {
+ modifier.close();
+ }
+ hitCount = getHitCount(dir, term);
+ assertEquals(0, hitCount);
+
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
// test when delete terms only apply to disk segments
public void testNonRAMDelete() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(2);
- modifier.setMaxBufferedDeleteTerms(2);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- int id = 0;
- int value = 100;
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
- for (int i = 0; i < 7; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.flush();
+ int id = 0;
+ int value = 100;
- assertEquals(0, modifier.getRamSegmentCount());
- assertTrue(0 < modifier.getSegmentCount());
+ for (int i = 0; i < 7; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.flush();
- IndexReader reader = IndexReader.open(dir);
- assertEquals(7, reader.numDocs());
- reader.close();
+ assertEquals(0, modifier.getRamSegmentCount());
+ assertTrue(0 < modifier.getSegmentCount());
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ if (!autoCommit) {
+ modifier.close();
+ }
- reader = IndexReader.open(dir);
- assertEquals(0, reader.numDocs());
- reader.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(7, reader.numDocs());
+ reader.close();
- modifier.close();
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
+ }
+
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+
+ if (!autoCommit) {
+ modifier.close();
+ }
+
+ reader = IndexReader.open(dir);
+ assertEquals(0, reader.numDocs());
+ reader.close();
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
// test when delete terms only apply to ram segments
public void testRAMDeletes() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(4);
- modifier.setMaxBufferedDeleteTerms(4);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(4);
+ modifier.setMaxBufferedDeleteTerms(4);
- int id = 0;
- int value = 100;
+ int id = 0;
+ int value = 100;
- addDoc(modifier, ++id, value);
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- addDoc(modifier, ++id, value);
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ addDoc(modifier, ++id, value);
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ addDoc(modifier, ++id, value);
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- assertEquals(2, modifier.getNumBufferedDeleteTerms());
- assertEquals(1, modifier.getBufferedDeleteTermsSize());
+ assertEquals(2, modifier.getNumBufferedDeleteTerms());
+ assertEquals(1, modifier.getBufferedDeleteTermsSize());
- addDoc(modifier, ++id, value);
- assertEquals(0, modifier.getSegmentCount());
- modifier.flush();
+ addDoc(modifier, ++id, value);
+ assertEquals(0, modifier.getSegmentCount());
+ modifier.flush();
- IndexReader reader = IndexReader.open(dir);
- assertEquals(1, reader.numDocs());
+ if (!autoCommit) {
+ modifier.close();
+ }
- int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
- assertEquals(1, hitCount);
- reader.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(1, reader.numDocs());
- modifier.close();
+ int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
+ assertEquals(1, hitCount);
+ reader.close();
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
// test when delete terms apply to both disk and ram segments
public void testBothDeletes() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(100);
- modifier.setMaxBufferedDeleteTerms(100);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- int id = 0;
- int value = 100;
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(100);
+ modifier.setMaxBufferedDeleteTerms(100);
- for (int i = 0; i < 5; i++) {
- addDoc(modifier, ++id, value);
- }
+ int id = 0;
+ int value = 100;
- value = 200;
- for (int i = 0; i < 5; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.flush();
+ for (int i = 0; i < 5; i++) {
+ addDoc(modifier, ++id, value);
+ }
- for (int i = 0; i < 5; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- modifier.flush();
+ value = 200;
+ for (int i = 0; i < 5; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.flush();
- IndexReader reader = IndexReader.open(dir);
- assertEquals(5, reader.numDocs());
+ for (int i = 0; i < 5; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- modifier.close();
+ modifier.flush();
+ if (!autoCommit) {
+ modifier.close();
+ }
+
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(5, reader.numDocs());
+ if (autoCommit) {
+ modifier.close();
+ }
+ }
}
// test that batched delete terms are flushed together
public void testBatchDeletes() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(2);
- modifier.setMaxBufferedDeleteTerms(2);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
- int id = 0;
- int value = 100;
+ int id = 0;
+ int value = 100;
- for (int i = 0; i < 7; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.flush();
+ for (int i = 0; i < 7; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.flush();
+ if (!autoCommit) {
+ modifier.close();
+ }
- IndexReader reader = IndexReader.open(dir);
- assertEquals(7, reader.numDocs());
- reader.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(7, reader.numDocs());
+ reader.close();
+
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer());
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
+ }
- id = 0;
- modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
- modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
+ id = 0;
+ modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
+ modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
- reader = IndexReader.open(dir);
- assertEquals(5, reader.numDocs());
- reader.close();
+ if (!autoCommit) {
+ modifier.close();
+ }
- Term[] terms = new Term[3];
- for (int i = 0; i < terms.length; i++) {
- terms[i] = new Term("id", String.valueOf(++id));
- }
- modifier.deleteDocuments(terms);
+ reader = IndexReader.open(dir);
+ assertEquals(5, reader.numDocs());
+ reader.close();
- reader = IndexReader.open(dir);
- assertEquals(2, reader.numDocs());
- reader.close();
+ Term[] terms = new Term[3];
+ for (int i = 0; i < terms.length; i++) {
+ terms[i] = new Term("id", String.valueOf(++id));
+ }
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer());
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
+ }
+ modifier.deleteDocuments(terms);
+ if (!autoCommit) {
+ modifier.close();
+ }
+ reader = IndexReader.open(dir);
+ assertEquals(2, reader.numDocs());
+ reader.close();
- modifier.close();
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
private void addDoc(IndexWriter modifier, int id, int value)
@@ -233,201 +317,203 @@
int START_COUNT = 157;
int END_COUNT = 144;
- // First build up a starting index:
- RAMDirectory startDir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(),
- true);
- for (int i = 0; i < 157; i++) {
- Document d = new Document();
- d.add(new Field("id", Integer.toString(i), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
- d.add(new Field("content", "aaa " + i, Field.Store.NO,
- Field.Index.TOKENIZED));
- writer.addDocument(d);
- }
- writer.close();
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- long diskUsage = startDir.sizeInBytes();
- long diskFree = diskUsage + 10;
+ // First build up a starting index:
+ RAMDirectory startDir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(startDir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ for (int i = 0; i < 157; i++) {
+ Document d = new Document();
+ d.add(new Field("id", Integer.toString(i), Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ d.add(new Field("content", "aaa " + i, Field.Store.NO,
+ Field.Index.TOKENIZED));
+ writer.addDocument(d);
+ }
+ writer.close();
- IOException err = null;
+ long diskUsage = startDir.sizeInBytes();
+ long diskFree = diskUsage + 10;
- boolean done = false;
+ IOException err = null;
- // Iterate w/ ever increasing free disk space:
- while (!done) {
- MockRAMDirectory dir = new MockRAMDirectory(startDir);
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), false);
+ boolean done = false;
- modifier.setMaxBufferedDocs(1000); // use flush or close
- modifier.setMaxBufferedDeleteTerms(1000); // use flush or close
+ // Iterate w/ ever increasing free disk space:
+ while (!done) {
+ MockRAMDirectory dir = new MockRAMDirectory(startDir);
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer());
- // For each disk size, first try to commit against
- // dir that will hit random IOExceptions & disk
- // full; after, give it infinite disk space & turn
- // off random IOExceptions & retry w/ same reader:
- boolean success = false;
+ modifier.setMaxBufferedDocs(1000); // use flush or close
+ modifier.setMaxBufferedDeleteTerms(1000); // use flush or close
- for (int x = 0; x < 2; x++) {
+ // For each disk size, first try to commit against
+ // dir that will hit random IOExceptions & disk
+ // full; after, give it infinite disk space & turn
+ // off random IOExceptions & retry w/ same reader:
+ boolean success = false;
- double rate = 0.1;
- double diskRatio = ((double)diskFree) / diskUsage;
- long thisDiskFree;
- String testName;
+ for (int x = 0; x < 2; x++) {
- if (0 == x) {
- thisDiskFree = diskFree;
- if (diskRatio >= 2.0) {
- rate /= 2;
- }
- if (diskRatio >= 4.0) {
- rate /= 2;
- }
- if (diskRatio >= 6.0) {
+ double rate = 0.1;
+ double diskRatio = ((double)diskFree) / diskUsage;
+ long thisDiskFree;
+ String testName;
+
+ if (0 == x) {
+ thisDiskFree = diskFree;
+ if (diskRatio >= 2.0) {
+ rate /= 2;
+ }
+ if (diskRatio >= 4.0) {
+ rate /= 2;
+ }
+ if (diskRatio >= 6.0) {
+ rate = 0.0;
+ }
+ if (debug) {
+ System.out.println("\ncycle: " + diskFree + " bytes");
+ }
+ testName = "disk full during reader.close() @ " + thisDiskFree
+ + " bytes";
+ } else {
+ thisDiskFree = 0;
rate = 0.0;
+ if (debug) {
+ System.out.println("\ncycle: same writer: unlimited disk space");
+ }
+ testName = "reader re-use after disk full";
}
- if (debug) {
- System.out.println("\ncycle: " + diskFree + " bytes");
+
+ dir.setMaxSizeInBytes(thisDiskFree);
+ dir.setRandomIOExceptionRate(rate, diskFree);
+
+ try {
+ if (0 == x) {
+ int docId = 12;
+ for (int i = 0; i < 13; i++) {
+ if (updates) {
+ Document d = new Document();
+ d.add(new Field("id", Integer.toString(i), Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ d.add(new Field("content", "bbb " + i, Field.Store.NO,
+ Field.Index.TOKENIZED));
+ modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
+ } else { // deletes
+ modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
+ // modifier.setNorm(docId, "contents", (float)2.0);
+ }
+ docId += 12;
+ }
+ }
+ modifier.close();
+ success = true;
+ if (0 == x) {
+ done = true;
+ }
}
- testName = "disk full during reader.close() @ " + thisDiskFree
- + " bytes";
- } else {
- thisDiskFree = 0;
- rate = 0.0;
- if (debug) {
- System.out.println("\ncycle: same writer: unlimited disk space");
+ catch (IOException e) {
+ if (debug) {
+ System.out.println(" hit IOException: " + e);
+ }
+ err = e;
+ if (1 == x) {
+ e.printStackTrace();
+ fail(testName + " hit IOException after disk space was freed up");
+ }
}
- testName = "reader re-use after disk full";
- }
- dir.setMaxSizeInBytes(thisDiskFree);
- dir.setRandomIOExceptionRate(rate, diskFree);
+ // Whether we succeeded or failed, check that all
+ // un-referenced files were in fact deleted (ie,
+ // we did not create garbage). Just create a
+ // new IndexFileDeleter, have it delete
+ // unreferenced files, then verify that in fact
+ // no files were deleted:
+ String[] startFiles = dir.list();
+ SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
+ String[] endFiles = dir.list();
- try {
- if (0 == x) {
- int docId = 12;
- for (int i = 0; i < 13; i++) {
- if (updates) {
- Document d = new Document();
- d.add(new Field("id", Integer.toString(i), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
- d.add(new Field("content", "bbb " + i, Field.Store.NO,
- Field.Index.TOKENIZED));
- modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
- } else { // deletes
- modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
- // modifier.setNorm(docId, "contents", (float)2.0);
- }
- docId += 12;
+ Arrays.sort(startFiles);
+ Arrays.sort(endFiles);
+
+ // for(int i=0;i<startFiles.length;i++) {
+ // System.out.println(" startFiles: " + i + ": " + startFiles[i]);
+ // }
+
+ if (!Arrays.equals(startFiles, endFiles)) {
+ String successStr;
+ if (success) {
+ successStr = "success";
+ } else {
+ successStr = "IOException";
+ err.printStackTrace();
}
+ fail("reader.close() failed to delete unreferenced files after "
+ + successStr + " (" + diskFree + " bytes): before delete:\n "
+ + arrayToString(startFiles) + "\n after delete:\n "
+ + arrayToString(endFiles));
}
- modifier.close();
- success = true;
- if (0 == x) {
- done = true;
+
+ // Finally, verify index is not corrupt, and, if
+ // we succeeded, we see all docs changed, and if
+ // we failed, we see either all docs or no docs
+ // changed (transactional semantics):
+ IndexReader newReader = null;
+ try {
+ newReader = IndexReader.open(dir);
}
- }
- catch (IOException e) {
- if (debug) {
- System.out.println(" hit IOException: " + e);
+ catch (IOException e) {
+ e.printStackTrace();
+ fail(testName
+ + ":exception when creating IndexReader after disk full during close: "
+ + e);
}
- err = e;
- if (1 == x) {
+
+ IndexSearcher searcher = new IndexSearcher(newReader);
+ Hits hits = null;
+ try {
+ hits = searcher.search(new TermQuery(searchTerm));
+ }
+ catch (IOException e) {
e.printStackTrace();
- fail(testName + " hit IOException after disk space was freed up");
+ fail(testName + ": exception when searching: " + e);
}
- }
-
- // Whether we succeeded or failed, check that all
- // un-referenced files were in fact deleted (ie,
- // we did not create garbage). Just create a
- // new IndexFileDeleter, have it delete
- // unreferenced files, then verify that in fact
- // no files were deleted:
- String[] startFiles = dir.list();
- SegmentInfos infos = new SegmentInfos();
- infos.read(dir);
- IndexFileDeleter d = new IndexFileDeleter(infos, dir);
- d.findDeletableFiles();
- d.deleteFiles();
- String[] endFiles = dir.list();
-
- Arrays.sort(startFiles);
- Arrays.sort(endFiles);
-
- // for(int i=0;i<startFiles.length;i++) {
- // System.out.println(" startFiles: " + i + ": " + startFiles[i]);
- // }
-
- if (!Arrays.equals(startFiles, endFiles)) {
- String successStr;
+ int result2 = hits.length();
if (success) {
- successStr = "success";
+ if (result2 != END_COUNT) {
+ fail(testName
+ + ": method did not throw exception but hits.length for search on term 'aaa' is "
+ + result2 + " instead of expected " + END_COUNT);
+ }
} else {
- successStr = "IOException";
- err.printStackTrace();
+ // On hitting exception we still may have added
+ // all docs:
+ if (result2 != START_COUNT && result2 != END_COUNT) {
+ err.printStackTrace();
+ fail(testName
+ + ": method did throw exception but hits.length for search on term 'aaa' is "
+ + result2 + " instead of expected " + START_COUNT);
+ }
}
- fail("reader.close() failed to delete unreferenced files after "
- + successStr + " (" + diskFree + " bytes): before delete:\n "
- + arrayToString(startFiles) + "\n after delete:\n "
- + arrayToString(endFiles));
- }
- // Finally, verify index is not corrupt, and, if
- // we succeeded, we see all docs changed, and if
- // we failed, we see either all docs or no docs
- // changed (transactional semantics):
- IndexReader newReader = null;
- try {
- newReader = IndexReader.open(dir);
- }
- catch (IOException e) {
- e.printStackTrace();
- fail(testName
- + ":exception when creating IndexReader after disk full during close: "
- + e);
- }
+ searcher.close();
+ newReader.close();
- IndexSearcher searcher = new IndexSearcher(newReader);
- Hits hits = null;
- try {
- hits = searcher.search(new TermQuery(searchTerm));
- }
- catch (IOException e) {
- e.printStackTrace();
- fail(testName + ": exception when searching: " + e);
- }
- int result2 = hits.length();
- if (success) {
- if (result2 != END_COUNT) {
- fail(testName
- + ": method did not throw exception but hits.length for search on term 'aaa' is "
- + result2 + " instead of expected " + END_COUNT);
+ if (result2 == END_COUNT) {
+ break;
}
- } else {
- // On hitting exception we still may have added
- // all docs:
- if (result2 != START_COUNT && result2 != END_COUNT) {
- err.printStackTrace();
- fail(testName
- + ": method did throw exception but hits.length for search on term 'aaa' is "
- + result2 + " instead of expected " + START_COUNT);
- }
}
- searcher.close();
- newReader.close();
+ dir.close();
- if (result2 == END_COUNT) {
- break;
- }
+ // Try again with 10 more bytes of free space:
+ diskFree += 10;
}
-
- dir.close();
-
- // Try again with 10 more bytes of free space:
- diskFree += 10;
}
}
Index: src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexReader.java (revision 515500)
+++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy)
@@ -802,9 +802,7 @@
String[] startFiles = dir.list();
SegmentInfos infos = new SegmentInfos();
infos.read(dir);
- IndexFileDeleter d = new IndexFileDeleter(infos, dir);
- d.findDeletableFiles();
- d.deleteFiles();
+ IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
String[] endFiles = dir.list();
Arrays.sort(startFiles);
Index: src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 515500)
+++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -71,7 +71,7 @@
reader.close();
// optimize the index and check that the new doc count is correct
- writer = new IndexWriter(dir, new WhitespaceAnalyzer());
+ writer = new IndexWriter(dir, true, new WhitespaceAnalyzer());
writer.optimize();
assertEquals(60, writer.docCount());
writer.close();
@@ -163,7 +163,7 @@
// addIndexes will certainly run out of space &
// fail. Verify that when this happens, index is
// not corrupt and index in fact has added no
- // documents. Then, we increase disk space by 1000
+ // documents. Then, we increase disk space by 2000
// bytes each iteration. At some point there is
// enough free disk space and addIndexes should
// succeed and index should show all documents were
@@ -178,11 +178,14 @@
startDiskUsage += startDir.fileLength(files[i]);
}
- for(int method=0;method<3;method++) {
+ for(int iter=0;iter<6;iter++) {
// Start with 100 bytes more than we are currently using:
long diskFree = diskUsage+100;
+ boolean autoCommit = iter % 2 == 0;
+ int method = iter/2;
+
boolean success = false;
boolean done = false;
@@ -195,7 +198,7 @@
methodName = "addIndexesNoOptimize(Directory[])";
}
- String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes";
+ String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes with autoCommit = " + autoCommit;
int cycleCount = 0;
@@ -205,7 +208,7 @@
// Make a new dir that will enforce disk usage:
MockRAMDirectory dir = new MockRAMDirectory(startDir);
- writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
IOException err = null;
for(int x=0;x<2;x++) {
@@ -285,38 +288,27 @@
}
}
- // Whether we succeeded or failed, check that all
- // un-referenced files were in fact deleted (ie,
- // we did not create garbage). Just create a
- // new IndexFileDeleter, have it delete
- // unreferenced files, then verify that in fact
- // no files were deleted:
- String[] startFiles = dir.list();
- SegmentInfos infos = new SegmentInfos();
- infos.read(dir);
- IndexFileDeleter d = new IndexFileDeleter(infos, dir);
- d.findDeletableFiles();
- d.deleteFiles();
- String[] endFiles = dir.list();
+ if (autoCommit) {
- Arrays.sort(startFiles);
- Arrays.sort(endFiles);
+ // Whether we succeeded or failed, check that
+ // all un-referenced files were in fact
+ // deleted (ie, we did not create garbage).
+ // Only check this when autoCommit is true:
+ // when it's false, it's expected that there
+ // are unreferenced files (ie they won't be
+ // referenced until the "commit on close").
+ // Just create a new IndexFileDeleter, have it
+ // delete unreferenced files, then verify that
+ // in fact no files were deleted:
- /*
- for(int i=0;i<startFiles.length;i++) {
- System.out.println(" " + i + ": " + startFiles[i]);
- }
- */
-
- if (!Arrays.equals(startFiles, endFiles)) {
String successStr;
if (success) {
successStr = "success";
} else {
successStr = "IOException";
- err.printStackTrace();
}
- fail(methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
+ String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)";
+ assertNoUnreferencedFiles(dir, message);
}
if (debug) {
@@ -335,8 +327,10 @@
}
int result = reader.docFreq(searchTerm);
if (success) {
- if (result != END_COUNT) {
+ if (autoCommit && result != END_COUNT) {
fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
+ } else if (!autoCommit && result != START_COUNT) {
+ fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]");
}
} else {
// On hitting exception we still may have added
@@ -374,31 +368,107 @@
System.out.println(" count is " + result);
}
- if (result == END_COUNT) {
+ if (done || result == END_COUNT) {
break;
}
}
- // Javadocs state that temp free Directory space
- // required is at most 2X total input size of
- // indices so let's make sure:
- assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName +
- ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " +
- "starting disk usage = " + startDiskUsage + " bytes; " +
- "input index disk usage = " + inputDiskUsage + " bytes",
- (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage));
+ if (debug) {
+ System.out.println(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes());
+ }
+ if (done) {
+ // Javadocs state that temp free Directory space
+ // required is at most 2X total input size of
+ // indices so let's make sure:
+ assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName +
+ ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " +
+ "starting disk usage = " + startDiskUsage + " bytes; " +
+ "input index disk usage = " + inputDiskUsage + " bytes",
+ (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage));
+ }
+
writer.close();
dir.close();
- // Try again with 1000 more bytes of free space:
- diskFree += 1000;
+ // Try again with 2000 more bytes of free space:
+ diskFree += 2000;
}
}
startDir.close();
}
+ /*
+ * Make sure IndexWriter cleans up on hitting a disk
+ * full exception in addDocument.
+ */
+ public void testAddDocumentOnDiskFull() throws IOException {
+
+ for(int pass=0;pass<3;pass++) {
+ boolean autoCommit = pass == 0;
+ boolean doAbort = pass == 2;
+ long diskFree = 200;
+ while(true) {
+ MockRAMDirectory dir = new MockRAMDirectory();
+ dir.setMaxSizeInBytes(diskFree);
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
+ boolean hitError = false;
+ try {
+ for(int i=0;i<200;i++) {
+ addDoc(writer);
+ }
+ } catch (IOException e) {
+ // e.printStackTrace();
+ hitError = true;
+ }
+
+ if (hitError) {
+ if (doAbort) {
+ writer.abort();
+ } else {
+ try {
+ writer.close();
+ } catch (IOException e) {
+ // e.printStackTrace();
+ dir.setMaxSizeInBytes(0);
+ writer.close();
+ }
+ }
+
+ assertNoUnreferencedFiles(dir, "after disk full during addDocument with autoCommit=" + autoCommit);
+
+ // Make sure reader can open the index:
+ IndexReader.open(dir).close();
+
+ dir.close();
+
+ // Now try again w/ more space:
+ diskFree += 500;
+ } else {
+ dir.close();
+ break;
+ }
+ }
+ }
+
+ }
+
+ public void assertNoUnreferencedFiles(Directory dir, String message) throws IOException {
+ String[] startFiles = dir.list();
+ SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
+ String[] endFiles = dir.list();
+
+ Arrays.sort(startFiles);
+ Arrays.sort(endFiles);
+
+ if (!Arrays.equals(startFiles, endFiles)) {
+ fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
+ }
+ }
+
/**
* Make sure optimize doesn't use any more than 1X
* starting index size as its temporary free space
@@ -694,6 +764,205 @@
}
}
+ /*
+ * Simple test for "commit on close": open writer with
+ * autoCommit=false, so it will only commit on close,
+ * then add a bunch of docs, making sure reader does not
+ * see these docs until writer is closed.
+ */
+ public void testCommitOnClose() throws IOException {
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ for (int i = 0; i < 14; i++) {
+ addDoc(writer);
+ }
+ writer.close();
+
+ Term searchTerm = new Term("content", "aaa");
+ IndexSearcher searcher = new IndexSearcher(dir);
+ Hits hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("first number of hits", 14, hits.length());
+ searcher.close();
+
+ IndexReader reader = IndexReader.open(dir);
+
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer());
+ for(int i=0;i<3;i++) {
+ for(int j=0;j<11;j++) {
+ addDoc(writer);
+ }
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("reader incorrectly sees changes from writer with autoCommit disabled", 14, hits.length());
+ searcher.close();
+ assertTrue("reader should have still been current", reader.isCurrent());
+ }
+
+ // Now, close the writer:
+ writer.close();
+ assertFalse("reader should not be current now", reader.isCurrent());
+
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("reader did not see changes after writer was closed", 47, hits.length());
+ searcher.close();
+ }
+
+ /*
+ * Simple test for "commit on close": open writer with
+ * autoCommit=false, so it will only commit on close,
+ * then add a bunch of docs, making sure reader does not
+ * see them until writer has closed. Then instead of
+ * closing the writer, call abort and verify reader sees
+ * nothing was added. Then verify we can open the index
+ * and add docs to it.
+ */
+ public void testCommitOnCloseAbort() throws IOException {
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ for (int i = 0; i < 14; i++) {
+ addDoc(writer);
+ }
+ writer.close();
+
+ Term searchTerm = new Term("content", "aaa");
+ IndexSearcher searcher = new IndexSearcher(dir);
+ Hits hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("first number of hits", 14, hits.length());
+ searcher.close();
+
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+ for(int j=0;j<17;j++) {
+ addDoc(writer);
+ }
+ // Delete all docs:
+ writer.deleteDocuments(searchTerm);
+
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("reader incorrectly sees changes from writer with autoCommit disabled", 14, hits.length());
+ searcher.close();
+
+ // Now, close the writer:
+ writer.abort();
+
+ assertNoUnreferencedFiles(dir, "unreferenced files remain after abort()");
+
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("saw changes after writer.abort", 14, hits.length());
+ searcher.close();
+
+ // Now make sure we can re-open the index, add docs,
+ // and all is good:
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+ for(int i=0;i<12;i++) {
+ for(int j=0;j<17;j++) {
+ addDoc(writer);
+ }
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("reader incorrectly sees changes from writer with autoCommit disabled", 14, hits.length());
+ searcher.close();
+ }
+
+ writer.close();
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(searchTerm));
+ assertEquals("didn't see changes after close", 218, hits.length());
+ searcher.close();
+
+ dir.close();
+ }
+
+ /*
+ * Verify that a writer with "commit on close" indeed
+ * cleans up the temp segments created after opening
+ * that are not referenced by the starting segments
+ * file. We check this by using MockRAMDirectory to
+ * measure max temp disk space used.
+ */
+ public void testCommitOnCloseDiskUsage() throws IOException {
+ MockRAMDirectory dir = new MockRAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ for(int j=0;j<30;j++) {
+ addDocWithIndex(writer, j);
+ }
+ writer.close();
+ dir.resetMaxUsedSizeInBytes();
+
+ long startDiskUsage = dir.getMaxUsedSizeInBytes();
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+ for(int j=0;j<1470;j++) {
+ addDocWithIndex(writer, j);
+ }
+ long midDiskUsage = dir.getMaxUsedSizeInBytes();
+ dir.resetMaxUsedSizeInBytes();
+ writer.optimize();
+ writer.close();
+ long endDiskUsage = dir.getMaxUsedSizeInBytes();
+
+ // Ending index is 50X as large as starting index; due
+ // to 2X disk usage normally we allow 100X max
+ // transient usage. If something is wrong w/ deleter
+ // and it doesn't delete intermediate segments then it
+ // will exceed this 100X:
+ // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
+ assertTrue("writer used to much space while adding documents when autoCommit=false",
+ midDiskUsage < 100*startDiskUsage);
+ assertTrue("writer used to much space after close when autoCommit=false",
+ endDiskUsage < 100*startDiskUsage);
+ }
+
+
+ /*
+ * Verify that calling optimize when writer is open for
+ * "commit on close" works correctly both for abort()
+ * and close().
+ */
+ public void testCommitOnCloseOptimize() throws IOException {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ for(int j=0;j<17;j++) {
+ addDocWithIndex(writer, j);
+ }
+ writer.close();
+
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+ writer.optimize();
+
+ // Open a reader before closing (commiting) the writer:
+ IndexReader reader = IndexReader.open(dir);
+
+ // Reader should see index as unoptimized at this
+ // point:
+ assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized());
+ reader.close();
+
+ // Abort the writer:
+ writer.abort();
+ assertNoUnreferencedFiles(dir, "aborted writer after optimize");
+
+ // Open a reader after aborting writer:
+ reader = IndexReader.open(dir);
+
+ // Reader should still see index as unoptimized:
+ assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized());
+ reader.close();
+
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+ writer.optimize();
+ writer.close();
+ assertNoUnreferencedFiles(dir, "aborted writer after optimize");
+
+ // Open a reader after aborting writer:
+ reader = IndexReader.open(dir);
+
+ // Reader should still see index as unoptimized:
+ assertTrue("Reader incorrectly sees that the index is unoptimized", reader.isOptimized());
+ reader.close();
+ }
+
// Make sure that a Directory implementation that does
// not use LockFactory at all (ie overrides makeLock and
// implements its own private locking) works OK. This
Index: src/test/org/apache/lucene/index/TestIndexFileDeleter.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexFileDeleter.java (revision 515500)
+++ src/test/org/apache/lucene/index/TestIndexFileDeleter.java (working copy)
@@ -173,6 +173,8 @@
out.writeBytes(b, len);
remainder -= len;
}
+ in.close();
+ out.close();
}
private void addDoc(IndexWriter writer, int id) throws IOException
Index: src/test/org/apache/lucene/index/TestDeletionPolicy.java
===================================================================
--- src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 0)
+++ src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 0)
@@ -0,0 +1,618 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import java.util.List;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.HashSet;
+
+/*
+ Verify we can read the pre-2.1 file format, do searches
+ against it, and add documents to it.
+*/
+
+public class TestDeletionPolicy extends TestCase
+{
+ private void verifyCommitOrder(List commits) {
+ long last = SegmentInfos.generationFromSegmentsFileName(((IndexCommitPoint) commits.get(0)).getSegmentsFileName());
+ for(int i=1;i<commits.size();i++) {
+ long now = SegmentInfos.generationFromSegmentsFileName(((IndexCommitPoint) commits.get(i)).getSegmentsFileName());
+ assertTrue("SegmentInfos commits are out-of-order", now > last);
+ last = now;
+ }
+ }
+
+ class KeepAllDeletionPolicy implements IndexDeletionPolicy {
+ int numOnInit;
+ int numOnCommit;
+ public void onInit(List commits) {
+ verifyCommitOrder(commits);
+ numOnInit++;
+ }
+ public void onCommit(List commits) {
+ verifyCommitOrder(commits);
+ numOnCommit++;
+ }
+ }
+
+ /**
+ * This is useful for adding to a big index w/ autoCommit
+ * false when you know readers are not using it.
+ */
+ class KeepNoneOnInitDeletionPolicy implements IndexDeletionPolicy {
+ int numOnInit;
+ int numOnCommit;
+ public void onInit(List commits) {
+ verifyCommitOrder(commits);
+ numOnInit++;
+ // On init, delete all commit points:
+ Iterator it = commits.iterator();
+ while(it.hasNext()) {
+ ((IndexCommitPoint) it.next()).delete();
+ }
+ }
+ public void onCommit(List commits) {
+ verifyCommitOrder(commits);
+ int size = commits.size();
+ // Delete all but last one:
+ for(int i=0;i<size-1;i++) {
+ ((IndexCommitPoint) commits.get(i)).delete();
+ }
+ numOnCommit++;
+ }
+ }
+
+ class KeepLastNDeletionPolicy implements IndexDeletionPolicy {
+ int numOnInit;
+ int numOnCommit;
+ int numToKeep;
+ int numDelete;
+ Set seen = new HashSet();
+
+ public KeepLastNDeletionPolicy(int numToKeep) {
+ this.numToKeep = numToKeep;
+ }
+
+ public void onInit(List commits) {
+ verifyCommitOrder(commits);
+ numOnInit++;
+ // do no deletions on init
+ doDeletes(commits, false);
+ }
+
+ public void onCommit(List commits) {
+ verifyCommitOrder(commits);
+ doDeletes(commits, true);
+ }
+
+ private void doDeletes(List commits, boolean isCommit) {
+
+ // Assert that we really are only called for each new
+ // commit:
+ if (isCommit) {
+ String fileName = ((IndexCommitPoint) commits.get(commits.size()-1)).getSegmentsFileName();
+ if (seen.contains(fileName)) {
+ throw new RuntimeException("onCommit was called twice on the same commit point: " + fileName);
+ }
+ seen.add(fileName);
+ numOnCommit++;
+ }
+ int size = commits.size();
+ for(int i=0;i<size-numToKeep;i++) {
+ ((IndexCommitPoint) commits.get(i)).delete();
+ numDelete++;
+ }
+ }
+ }
+
+ /*
+ * Delete a commit only when it has been obsoleted by N
+ * seconds.
+ */
+ class ExpirationTimeDeletionPolicy implements IndexDeletionPolicy {
+
+ Directory dir;
+ double expirationTimeSeconds;
+ int numDelete;
+
+ public ExpirationTimeDeletionPolicy(Directory dir, double seconds) {
+ this.dir = dir;
+ this.expirationTimeSeconds = seconds;
+ }
+
+ public void onInit(List commits) throws IOException {
+ verifyCommitOrder(commits);
+ onCommit(commits);
+ }
+
+ public void onCommit(List commits) throws IOException {
+ verifyCommitOrder(commits);
+
+ IndexCommitPoint lastCommit = (IndexCommitPoint) commits.get(commits.size()-1);
+
+ // Any commit older than expireTime should be deleted:
+ double expireTime = dir.fileModified(lastCommit.getSegmentsFileName())/1000.0 - expirationTimeSeconds;
+
+ Iterator it = commits.iterator();
+
+ while(it.hasNext()) {
+ IndexCommitPoint commit = (IndexCommitPoint) it.next();
+ double modTime = dir.fileModified(commit.getSegmentsFileName())/1000.0;
+ if (commit != lastCommit && modTime < expireTime) {
+ commit.delete();
+ numDelete += 1;
+ }
+ }
+ }
+ }
+
+ /*
+ * Test "by time expiration" deletion policy:
+ */
+ public void testExpirationTimeDeletionPolicy() throws IOException, InterruptedException {
+
+ final double SECONDS = 2.0;
+
+ boolean autoCommit = false;
+ boolean useCompoundFile = true;
+
+ Directory dir = new RAMDirectory();
+ ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(dir, SECONDS);
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ writer.close();
+
+ for(int i=0;i<7;i++) {
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ for(int j=0;j<17;j++) {
+ addDoc(writer);
+ }
+ writer.close();
+
+ // Make sure to sleep long enough so that some commit
+ // points will be deleted:
+ Thread.sleep((int) (1000.0*(SECONDS/5.0)));
+ }
+
+ // First, make sure the policy in fact deleted something:
+ assertTrue("no commits were deleted", policy.numDelete > 0);
+
+ // Then simplistic check: just verify that the
+ // segments_N's that still exist are in fact within SECONDS
+ // seconds of the last one's mod time, and, that I can
+ // open a reader on each:
+ long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
+
+ String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ gen);
+ long newestModTime = dir.fileModified(fileName);
+
+ while(gen > 0) {
+ try {
+ IndexReader reader = IndexReader.open(dir);
+ reader.close();
+ fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ gen);
+ long modTime = dir.fileModified(fileName);
+ assertTrue("commit point was older than " + SECONDS + " seconds but did not get deleted", newestModTime - modTime < (SECONDS*1000));
+ } catch (IOException e) {
+ // OK
+ break;
+ }
+
+ dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
+ gen--;
+ }
+
+ dir.close();
+ }
+
+ /*
+ * Test a silly deletion policy that keeps all commits around.
+ */
+ public void testKeepAllDeletionPolicy() throws IOException {
+
+ for(int pass=0;pass<4;pass++) {
+
+ boolean autoCommit = pass < 2;
+ boolean useCompoundFile = (pass % 2) > 0;
+
+ KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy();
+
+ Directory dir = new RAMDirectory();
+
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ for(int i=0;i<107;i++) {
+ addDoc(writer);
+ }
+ writer.close();
+
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ writer.optimize();
+ writer.close();
+
+ assertEquals(2, policy.numOnInit);
+ if (autoCommit) {
+ assertTrue(policy.numOnCommit > 2);
+ } else {
+ // If we are not auto committing then there should
+ // be exactly 2 commits (one per close above):
+ assertEquals(2, policy.numOnCommit);
+ }
+
+ // Simplistic check: just verify all segments_N's still
+ // exist, and, I can open a reader on each:
+ long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
+ while(gen > 0) {
+ IndexReader reader = IndexReader.open(dir);
+ reader.close();
+ dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
+ gen--;
+
+ if (gen > 0) {
+ // Now that we've removed a commit point, which
+ // should have orphan'd at least one index file.
+ // Open & close a writer and assert that it
+ // actually removed something:
+ int preCount = dir.list().length;
+ writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy);
+ writer.close();
+ int postCount = dir.list().length;
+ assertTrue(postCount < preCount);
+ }
+ }
+
+ dir.close();
+ }
+ }
+
+ /* Test keeping NO commit points. This is a viable and
+ * useful case eg where you want to build a big index with
+ * autoCommit false and you know there are no readers.
+ */
+ public void testKeepNoneOnInitDeletionPolicy() throws IOException {
+
+ for(int pass=0;pass<4;pass++) {
+
+ boolean autoCommit = pass < 2;
+ boolean useCompoundFile = (pass % 2) > 0;
+
+ KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy();
+
+ Directory dir = new RAMDirectory();
+
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ for(int i=0;i<107;i++) {
+ addDoc(writer);
+ }
+ writer.close();
+
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ writer.optimize();
+ writer.close();
+
+ assertEquals(2, policy.numOnInit);
+ if (autoCommit) {
+ assertTrue(policy.numOnCommit > 2);
+ } else {
+ // If we are not auto committing then there should
+ // be exactly 2 commits (one per close above):
+ assertEquals(2, policy.numOnCommit);
+ }
+
+ // Simplistic check: just verify the index is in fact
+ // readable:
+ IndexReader reader = IndexReader.open(dir);
+ reader.close();
+
+ dir.close();
+ }
+ }
+
+ /*
+ * Test a deletion policy that keeps last N commits.
+ */
+ public void testKeepLastNDeletionPolicy() throws IOException {
+
+ final int N = 5;
+
+ for(int pass=0;pass<4;pass++) {
+
+ boolean autoCommit = pass < 2;
+ boolean useCompoundFile = (pass % 2) > 0;
+
+ Directory dir = new RAMDirectory();
+
+ KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);
+
+ for(int j=0;j<N+1;j++) {
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ for(int i=0;i<17;i++) {
+ addDoc(writer);
+ }
+ writer.optimize();
+ writer.close();
+ }
+
+ assertTrue(policy.numDelete > 0);
+ assertEquals(N+1, policy.numOnInit);
+ if (autoCommit) {
+ assertTrue(policy.numOnCommit > 1);
+ } else {
+ assertEquals(N+1, policy.numOnCommit);
+ }
+
+ // Simplistic check: just verify only the past N segments_N's still
+ // exist, and, I can open a reader on each:
+ long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
+ for(int i=0;i<N+1;i++) {
+ try {
+ IndexReader reader = IndexReader.open(dir);
+ reader.close();
+ if (i == N) {
+ fail("should have failed on commits prior to last " + N);
+ }
+ } catch (IOException e) {
+ if (i != N) {
+ throw e;
+ }
+ }
+ if (i < N) {
+ dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
+ }
+ gen--;
+ }
+
+ dir.close();
+ }
+ }
+
+ /*
+ * Test a deletion policy that keeps last N commits
+ * around, with reader doing deletes.
+ */
+ public void testKeepLastNDeletionPolicyWithReader() throws IOException {
+
+ final int N = 10;
+
+ for(int pass=0;pass<4;pass++) {
+
+ boolean autoCommit = pass < 2;
+ boolean useCompoundFile = (pass % 2) > 0;
+
+ KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);
+
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ writer.close();
+ Term searchTerm = new Term("content", "aaa");
+ Query query = new TermQuery(searchTerm);
+
+ for(int i=0;i<N+1;i++) {
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ for(int j=0;j<17;j++) {
+ addDoc(writer);
+ }
+ // this is a commit when autoCommit=false:
+ writer.close();
+ IndexReader reader = IndexReader.open(dir, policy);
+ reader.deleteDocument(3*i+1);
+ reader.setNorm(4*i+1, "content", 2.0F);
+ IndexSearcher searcher = new IndexSearcher(reader);
+ Hits hits = searcher.search(query);
+ assertEquals(16*(1+i), hits.length());
+ // this is a commit when autoCommit=false:
+ reader.close();
+ searcher.close();
+ }
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ writer.optimize();
+ // this is a commit when autoCommit=false:
+ writer.close();
+
+ assertEquals(2*(N+2), policy.numOnInit);
+ if (autoCommit) {
+ assertTrue(policy.numOnCommit > 2*(N+2)-1);
+ } else {
+ assertEquals(2*(N+2)-1, policy.numOnCommit);
+ }
+
+ IndexSearcher searcher = new IndexSearcher(dir);
+ Hits hits = searcher.search(query);
+ assertEquals(176, hits.length());
+
+ // Simplistic check: just verify only the past N segments_N's still
+ // exist, and, I can open a reader on each:
+ long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
+
+ int expectedCount = 176;
+
+ for(int i=0;i<N+1;i++) {
+ try {
+ IndexReader reader = IndexReader.open(dir);
+
+ // Work backwards in commits on what the expected
+ // count should be. Only check this in the
+ // autoCommit false case:
+ if (!autoCommit) {
+ searcher = new IndexSearcher(reader);
+ hits = searcher.search(query);
+ if (i > 1) {
+ if (i % 2 == 0) {
+ expectedCount += 1;
+ } else {
+ expectedCount -= 17;
+ }
+ }
+ assertEquals(expectedCount, hits.length());
+ searcher.close();
+ }
+ reader.close();
+ if (i == N) {
+ fail("should have failed on commits before last 5");
+ }
+ } catch (IOException e) {
+ if (i != N) {
+ throw e;
+ }
+ }
+ if (i < N) {
+ dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
+ }
+ gen--;
+ }
+
+ dir.close();
+ }
+ }
+
+ /*
+ * Test a deletion policy that keeps last N commits
+ * around, through creates.
+ */
+ public void testKeepLastNDeletionPolicyWithCreates() throws IOException {
+
+ final int N = 10;
+
+ for(int pass=0;pass<4;pass++) {
+
+ boolean autoCommit = pass < 2;
+ boolean useCompoundFile = (pass % 2) > 0;
+
+ KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);
+
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ writer.close();
+ Term searchTerm = new Term("content", "aaa");
+ Query query = new TermQuery(searchTerm);
+
+ for(int i=0;i<N+1;i++) {
+
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+ writer.setUseCompoundFile(useCompoundFile);
+ for(int j=0;j<17;j++) {
+ addDoc(writer);
+ }
+ // this is a commit when autoCommit=false:
+ writer.close();
+ IndexReader reader = IndexReader.open(dir, policy);
+ reader.deleteDocument(3);
+ reader.setNorm(5, "content", 2.0F);
+ IndexSearcher searcher = new IndexSearcher(reader);
+ Hits hits = searcher.search(query);
+ assertEquals(16, hits.length());
+ // this is a commit when autoCommit=false:
+ reader.close();
+ searcher.close();
+
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+ // This will not commit: there are no changes
+ // pending because we opened for "create":
+ writer.close();
+ }
+
+ assertEquals(1+3*(N+1), policy.numOnInit);
+ if (autoCommit) {
+ assertTrue(policy.numOnCommit > 3*(N+1)-1);
+ } else {
+ assertEquals(2*(N+1), policy.numOnCommit);
+ }
+
+ IndexSearcher searcher = new IndexSearcher(dir);
+ Hits hits = searcher.search(query);
+ assertEquals(0, hits.length());
+
+ // Simplistic check: just verify only the past N segments_N's still
+ // exist, and, I can open a reader on each:
+ long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
+
+ int expectedCount = 0;
+
+ for(int i=0;i<N+1;i++) {
+ try {
+ IndexReader reader = IndexReader.open(dir);
+
+ // Work backwards in commits on what the expected
+ // count should be. Only check this in the
+ // autoCommit false case:
+ if (!autoCommit) {
+ searcher = new IndexSearcher(reader);
+ hits = searcher.search(query);
+ assertEquals(expectedCount, hits.length());
+ searcher.close();
+ if (expectedCount == 0) {
+ expectedCount = 16;
+ } else if (expectedCount == 16) {
+ expectedCount = 17;
+ } else if (expectedCount == 17) {
+ expectedCount = 0;
+ }
+ }
+ reader.close();
+ if (i == N) {
+ fail("should have failed on commits before last " + N);
+ }
+ } catch (IOException e) {
+ if (i != N) {
+ throw e;
+ }
+ }
+ if (i < N) {
+ dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
+ }
+ gen--;
+ }
+
+ dir.close();
+ }
+ }
+
+ private void addDoc(IndexWriter writer) throws IOException
+ {
+ Document doc = new Document();
+ doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+ }
+}
Property changes on: src/test/org/apache/lucene/index/TestDeletionPolicy.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
===================================================================
--- src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 515500)
+++ src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy)
@@ -21,7 +21,7 @@
import java.util.zip.*;
/*
- Verify we can read the pre-XXX file format, do searches
+ Verify we can read the pre-2.1 file format, do searches
against it, and add documents to it.
*/
@@ -104,8 +104,12 @@
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
- changeIndexNoAdds(oldNames[i]);
+ changeIndexNoAdds(oldNames[i], true);
rmDir(oldNames[i]);
+
+ unzip(dirName, oldNames[i]);
+ changeIndexNoAdds(oldNames[i], false);
+ rmDir(oldNames[i]);
}
}
@@ -114,8 +118,12 @@
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
- changeIndexWithAdds(oldNames[i]);
+ changeIndexWithAdds(oldNames[i], true);
rmDir(oldNames[i]);
+
+ unzip(dirName, oldNames[i]);
+ changeIndexWithAdds(oldNames[i], false);
+ rmDir(oldNames[i]);
}
}
@@ -141,13 +149,14 @@
/* Open pre-lockless index, add docs, do a delete &
* setNorm, and search */
- public void changeIndexWithAdds(String dirName) throws IOException {
+ public void changeIndexWithAdds(String dirName, boolean autoCommit) throws IOException {
dirName = fullDir(dirName);
Directory dir = FSDirectory.getDirectory(dirName);
+
// open writer
- IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
// add 10 docs
for(int i=0;i<10;i++) {
@@ -166,7 +175,7 @@
assertEquals("wrong first document", "21", d.get("id"));
searcher.close();
- // make sure we can do another delete & another setNorm against this
+ // make sure we can do delete & setNorm against this
// pre-lockless segment:
IndexReader reader = IndexReader.open(dir);
Term searchTerm = new Term("id", "6");
@@ -175,7 +184,7 @@
reader.setNorm(22, "content", (float) 2.0);
reader.close();
- // make sure 2nd delete & 2nd norm "took":
+ // make sure they "took":
searcher = new IndexSearcher(dir);
hits = searcher.search(new TermQuery(new Term("content", "aaa")));
assertEquals("wrong number of hits", 43, hits.length());
@@ -184,7 +193,7 @@
searcher.close();
// optimize
- writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+ writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
writer.optimize();
writer.close();
@@ -200,7 +209,7 @@
/* Open pre-lockless index, add docs, do a delete &
* setNorm, and search */
- public void changeIndexNoAdds(String dirName) throws IOException {
+ public void changeIndexNoAdds(String dirName, boolean autoCommit) throws IOException {
dirName = fullDir(dirName);
@@ -214,7 +223,7 @@
assertEquals("wrong first document", "21", d.get("id"));
searcher.close();
- // make sure we can do another delete & another setNorm against this
+ // make sure we can do a delete & setNorm against this
// pre-lockless segment:
IndexReader reader = IndexReader.open(dir);
Term searchTerm = new Term("id", "6");
@@ -223,7 +232,7 @@
reader.setNorm(22, "content", (float) 2.0);
reader.close();
- // make sure 2nd delete & 2nd norm "took":
+ // make sure they "took":
searcher = new IndexSearcher(dir);
hits = searcher.search(new TermQuery(new Term("content", "aaa")));
assertEquals("wrong number of hits", 33, hits.length());
@@ -232,7 +241,7 @@
searcher.close();
// optimize
- IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);
writer.optimize();
writer.close();
@@ -273,66 +282,78 @@
/* Verifies that the expected file names were produced */
- // disable until hardcoded file names are fixes:
public void testExactFileNames() throws IOException {
- String outputDir = "lucene.backwardscompat0.index";
- Directory dir = FSDirectory.getDirectory(fullDir(outputDir));
- IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
- for(int i=0;i<35;i++) {
- addDoc(writer, i);
- }
- assertEquals("wrong doc count", 35, writer.docCount());
- writer.close();
+ for(int pass=0;pass<2;pass++) {
- // Delete one doc so we get a .del file:
- IndexReader reader = IndexReader.open(dir);
- Term searchTerm = new Term("id", "7");
- int delCount = reader.deleteDocuments(searchTerm);
- assertEquals("didn't delete the right number of documents", 1, delCount);
+ String outputDir = "lucene.backwardscompat0.index";
- // Set one norm so we get a .s0 file:
- reader.setNorm(21, "content", (float) 1.5);
- reader.close();
+ try {
+ Directory dir = FSDirectory.getDirectory(fullDir(outputDir));
- // The numbering of fields can vary depending on which
- // JRE is in use. On some JREs we see content bound to
- // field 0; on others, field 1. So, here we have to
- // figure out which field number corresponds to
- // "content", and then set our expected file names below
- // accordingly:
- CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
- FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
- int contentFieldIndex = -1;
- for(int i=0;i<fieldInfos.size();i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
- if (fi.name.equals("content")) {
- contentFieldIndex = i;
- break;
- }
- }
- cfsReader.close();
- assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
+ boolean autoCommit = 0 == pass;
+
+ IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
+ //IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ for(int i=0;i<35;i++) {
+ addDoc(writer, i);
+ }
+ assertEquals("wrong doc count", 35, writer.docCount());
+ writer.close();
- // Now verify file names:
- String[] expected = {"_0.cfs",
- "_0_1.del",
- "_1.cfs",
- "_2.cfs",
- "_2_1.s" + contentFieldIndex,
- "_3.cfs",
- "segments_a",
- "segments.gen"};
+ // Delete one doc so we get a .del file:
+ IndexReader reader = IndexReader.open(dir);
+ Term searchTerm = new Term("id", "7");
+ int delCount = reader.deleteDocuments(searchTerm);
+ assertEquals("didn't delete the right number of documents", 1, delCount);
- String[] actual = dir.list();
- Arrays.sort(expected);
- Arrays.sort(actual);
- if (!Arrays.equals(expected, actual)) {
- fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual));
+ // Set one norm so we get a .s0 file:
+ reader.setNorm(21, "content", (float) 1.5);
+ reader.close();
+
+ // The numbering of fields can vary depending on which
+ // JRE is in use. On some JREs we see content bound to
+ // field 0; on others, field 1. So, here we have to
+ // figure out which field number corresponds to
+ // "content", and then set our expected file names below
+ // accordingly:
+ CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
+ FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
+ int contentFieldIndex = -1;
+ for(int i=0;i<fieldInfos.size();i++) {
+ FieldInfo fi = fieldInfos.fieldInfo(i);
+ if (fi.name.equals("content")) {
+ contentFieldIndex = i;
+ break;
+ }
+ }
+ cfsReader.close();
+ assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
+
+ // Now verify file names:
+ String[] expected = {"_0.cfs",
+ "_0_1.del",
+ "_1.cfs",
+ "_2.cfs",
+ "_2_1.s" + contentFieldIndex,
+ "_3.cfs",
+ "segments_a",
+ "segments.gen"};
+ if (!autoCommit) {
+ expected[6] = "segments_3";
+ }
+
+ String[] actual = dir.list();
+ Arrays.sort(expected);
+ Arrays.sort(actual);
+ if (!Arrays.equals(expected, actual)) {
+ fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual));
+ }
+ dir.close();
+ } finally {
+ rmDir(outputDir);
+ }
}
- dir.close();
-
- rmDir(outputDir);
}
private String asString(String[] l) {
Index: src/java/org/apache/lucene/index/IndexDeletionPolicy.java
===================================================================
--- src/java/org/apache/lucene/index/IndexDeletionPolicy.java (revision 0)
+++ src/java/org/apache/lucene/index/IndexDeletionPolicy.java (revision 0)
@@ -0,0 +1,83 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+import java.io.IOException;
+
+/**
+ * <p>Expert: implement this interface, and pass it to one
+ * of the {@link IndexWriter} or {@link IndexReader}
+ * constructors, to customize when "point in time" commits
+ * are deleted from an index. The default deletion policy
+ * is {@link KeepOnlyLastCommitDeletionPolicy}, which always
+ * removes old commits as soon as a new commit is done (this
+ * matches the behavior before 2.2).</p>
+ *
+ * <p>One expected use case for this (and the reason why it
+ * was first created) is to work around problems with an
+ * index directory accessed via filesystems like NFS because
+ * NFS does not provide the "delete on last close" semantics
+ * that Lucene's "point in time" search normally relies on.
+ * By implementing a custom deletion policy, such as "a
+ * commit is only removed once it has been stale for more
+ * than X minutes", you can give your readers time to
+ * refresh to the new commit before {@link IndexWriter}
+ * removes the old commits. Note that doing so will
+ * increase the storage requirements of the index. See <a
+ * target="top"
+ * href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
+ * for details.</p>
+ */
+
+public interface IndexDeletionPolicy {
+
+ /**
+ * <p>This is called once when a writer is first
+ * instantiated to give the policy a chance to remove old
+ * commit points.</p>
+ *
+ * <p>The writer locates all commits present in the index
+ * and calls this method. The policy may choose to delete
+ * commit points. To delete a commit point, call the
+ * {@link IndexCommitPoint#delete} method.</p>
+ *
+ * @param commits List of {@link IndexCommitPoint},
+ * sorted by age (the 0th one is the oldest commit).
+ */
+ public void onInit(List commits) throws IOException;
+
+ /**
+ * <p>This is called each time the writer commits. This
+ * gives the policy a chance to remove old commit points
+ * with each commit.</p>
+ *
+ * <p>If writer has <code>autoCommit = true</code> then
+ * this method will in general be called many times during
+ * one instance of {@link IndexWriter}. If
+ * <code>autoCommit = false</code> then this method is
+ * only called once when {@link IndexWriter#close} is
+ * called, or not at all if the {@link IndexWriter#abort}
+ * is called. The policy may now choose to delete old
+ * commit points by calling {@link IndexCommitPoint#delete}.
+ *
+ * @param commits List of {@link IndexCommitPoint}>,
+ * sorted by age (the 0th one is the oldest commit).
+ */
+ public void onCommit(List commits) throws IOException;
+}
Property changes on: src/java/org/apache/lucene/index/IndexDeletionPolicy.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/MultiReader.java
===================================================================
--- src/java/org/apache/lucene/index/MultiReader.java (revision 515500)
+++ src/java/org/apache/lucene/index/MultiReader.java (working copy)
@@ -220,13 +220,6 @@
return new MultiTermPositions(subReaders, starts);
}
- protected void setDeleter(IndexFileDeleter deleter) {
- // Share deleter to our SegmentReaders:
- this.deleter = deleter;
- for (int i = 0; i < subReaders.length; i++)
- subReaders[i].setDeleter(deleter);
- }
-
protected void doCommit() throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (revision 515500)
+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -114,7 +114,7 @@
private Directory directory;
private boolean directoryOwner;
private boolean closeDirectory;
- protected IndexFileDeleter deleter;
+ private IndexDeletionPolicy deletionPolicy;
private boolean isClosed;
private SegmentInfos segmentInfos;
@@ -131,30 +131,44 @@
path.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- */
+ * @param path the path to the index directory */
public static IndexReader open(String path) throws CorruptIndexException, IOException {
- return open(FSDirectory.getDirectory(path), true);
+ return open(FSDirectory.getDirectory(path), true, null);
}
/** Returns an IndexReader reading the index in an FSDirectory in the named
- path.
+ * path.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- */
+ * @param path the path to the index directory */
public static IndexReader open(File path) throws CorruptIndexException, IOException {
- return open(FSDirectory.getDirectory(path), true);
+ return open(FSDirectory.getDirectory(path), true, null);
}
/** Returns an IndexReader reading the index in the given Directory.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
+ * @param directory the index directory
*/
public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
- return open(directory, false);
+ return open(directory, false, null);
}
- private static IndexReader open(final Directory directory, final boolean closeDirectory) throws CorruptIndexException, IOException {
+ /** Expert: returns an IndexReader reading the index in the given
+ * Directory, with a custom {@link IndexDeletionPolicy}.
+ * @param directory the index directory
+ * @param deletionPolicy a custom deletion policy (only used
+ * if you use this reader to perform deletes or to set
+ * norms); see {@link IndexWriter} for details.
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ */
+ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
+ return open(directory, false, deletionPolicy);
+ }
+ private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
+
return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) {
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
@@ -162,8 +176,10 @@
SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
+ IndexReader reader;
+
if (infos.size() == 1) { // index is optimized
- return SegmentReader.get(infos, infos.info(0), closeDirectory);
+ reader = SegmentReader.get(infos, infos.info(0), closeDirectory);
} else {
// To reduce the chance of hitting FileNotFound
@@ -184,8 +200,10 @@
}
}
- return new MultiReader(directory, infos, closeDirectory, readers);
+ reader = new MultiReader(directory, infos, closeDirectory, readers);
}
+ reader.deletionPolicy = deletionPolicy;
+ return reader;
}
}.run();
}
@@ -715,20 +733,14 @@
*/
protected final synchronized void commit() throws IOException {
if(hasChanges){
- if (deleter == null) {
- // In the MultiReader case, we share this deleter
- // across all SegmentReaders:
- setDeleter(new IndexFileDeleter(segmentInfos, directory));
- }
if(directoryOwner){
- // Should not be necessary: no prior commit should
- // have left pending files, so just defensive:
- deleter.clearPendingFiles();
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ IndexFileDeleter deleter = new IndexFileDeleter(directory,
+ deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
+ segmentInfos, null);
- String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
- String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
-
// Checkpoint the state we are about to change, in
// case we have to roll back:
startCommit();
@@ -749,24 +761,16 @@
// actually in the index):
rollbackCommit();
- // Erase any pending files that we were going to delete:
- deleter.clearPendingFiles();
-
- // Remove possibly partially written next
- // segments file:
- deleter.deleteFile(nextSegmentsFileName);
-
// Recompute deletable files & remove them (so
// partially written .del files, etc, are
// removed):
- deleter.findDeletableFiles();
- deleter.deleteFiles();
+ deleter.refresh();
}
}
- // Attempt to delete all files we just obsoleted:
- deleter.deleteFile(oldInfoFileName);
- deleter.commitPendingFiles();
+ // Have the deleter remove any now unreferenced
+ // files due to this commit:
+ deleter.checkpoint(segmentInfos, true);
if (writeLock != null) {
writeLock.release(); // release write lock
@@ -779,13 +783,6 @@
hasChanges = false;
}
- protected void setDeleter(IndexFileDeleter deleter) {
- this.deleter = deleter;
- }
- protected IndexFileDeleter getDeleter() {
- return deleter;
- }
-
/** Implements commit. */
protected abstract void doCommit() throws IOException;
Index: src/java/org/apache/lucene/index/IndexCommitPoint.java
===================================================================
--- src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0)
+++ src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0)
@@ -0,0 +1,41 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Represents a single commit into an index as seen by the
+ * {@link IndexDeletionPolicy}.
+ */
+
+public interface IndexCommitPoint {
+
+ /**
+ * Get the segments file (ie, <code>segments_N</code>) of
+ * this commit point.
+ */
+ public String getSegmentsFileName();
+
+ /**
+ * Notify the writer that this commit point should be
+ * deleted. This should only be called by the {@link
+ * IndexDeletionPolicy} during its {@link
+ * IndexDeletionPolicy#onInit} or {@link
+ * IndexDeletionPolicy#onCommit} method.
+ */
+ public void delete();
+}
Property changes on: src/java/org/apache/lucene/index/IndexCommitPoint.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNames.java (revision 515500)
+++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy)
@@ -37,7 +37,19 @@
/** Extension of norms file */
static final String NORMS_EXTENSION = "nrm";
-
+
+ /** Extension of compound file */
+ static final String COMPOUND_FILE_EXTENSION = "cfs";
+
+ /** Extension of deletes */
+ static final String DELETES_EXTENSION = "del";
+
+ /** Extension of single norms */
+ static final String SINGLE_NORMS_EXTENSION = "f";
+
+ /** Extension of separate norms */
+ static final String SEPARATE_NORMS_EXTENSION = "s";
+
/**
* This array contains all filename extensions used by
* Lucene's index files, with two exceptions, namely the
@@ -50,6 +62,13 @@
"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
"tvx", "tvd", "tvf", "gen", "nrm"
};
+
+ /** File extensions that are added to a compound file
+ * (same as above, minus "del", "gen", "cfs"). */
+ static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] {
+ "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx",
+ "tvx", "tvd", "tvf", "nrm"
+ };
/** File extensions of old-style index files */
static final String COMPOUND_EXTENSIONS[] = new String[] {
Index: src/java/org/apache/lucene/index/SegmentInfos.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfos.java (revision 515500)
+++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy)
@@ -88,16 +88,9 @@
for (int i = 0; i < files.length; i++) {
String file = files[i];
if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
- if (file.equals(IndexFileNames.SEGMENTS)) {
- // Pre lock-less commits:
- if (max == -1) {
- max = 0;
- }
- } else {
- long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX);
- if (v > max) {
- max = v;
- }
+ long gen = generationFromSegmentsFileName(file);
+ if (gen > max) {
+ max = gen;
}
}
}
@@ -152,6 +145,22 @@
}
/**
+ * Parse the generation off the segments file name and
+ * return it.
+ */
+ public static long generationFromSegmentsFileName(String fileName) {
+ if (fileName.equals(IndexFileNames.SEGMENTS)) {
+ return 0;
+ } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+ return Long.parseLong(fileName.substring(1+IndexFileNames.SEGMENTS.length()),
+ Character.MAX_RADIX);
+ } else {
+ throw new IllegalArgumentException("fileName \"" + fileName + "\" is not a segments file");
+ }
+ }
+
+
+ /**
* Get the next segments_N filename that will be written.
*/
public String getNextSegmentFileName() {
@@ -181,12 +190,8 @@
IndexInput input = directory.openInput(segmentFileName);
- if (segmentFileName.equals(IndexFileNames.SEGMENTS)) {
- generation = 0;
- } else {
- generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()),
- Character.MAX_RADIX);
- }
+ generation = generationFromSegmentsFileName(segmentFileName);
+
lastGeneration = generation;
try {
@@ -255,6 +260,8 @@
IndexOutput output = directory.createOutput(segmentFileName);
+ boolean success = false;
+
try {
output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
output.writeLong(++version); // every write changes
@@ -266,7 +273,16 @@
}
}
finally {
- output.close();
+ try {
+ output.close();
+ success = true;
+ } finally {
+ if (!success) {
+ // Try not to leave a truncated segments_N file in
+ // the index:
+ directory.deleteFile(segmentFileName);
+ }
+ }
}
try {
@@ -305,6 +321,9 @@
public long getVersion() {
return version;
}
+ public long getGeneration() {
+ return generation;
+ }
/**
* Current version number from segments file.
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 515500)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -29,48 +29,100 @@
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.Vector;
-import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.List;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
/**
- An IndexWriter creates and maintains an index.
+ An <code>IndexWriter</code> creates and maintains an index.
- <p>The third argument (<code>create</code>) to the
+ <p>The <code>create</code> argument to the
<a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
determines whether a new index is created, or whether an existing index is
- opened for the addition of new documents. Note that you
- can open an index with create=true even while readers are
+ opened. Note that you
+ can open an index with <code>create=true</code> even while readers are
using the index. The old readers will continue to search
the "point in time" snapshot they had opened, and won't
- see the newly created index until they re-open.</p>
+ see the newly created index until they re-open. There are
+ also <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
+ with no <code>create</code> argument which
+ will create a new index if there is not already an index at the
+ provided path and otherwise open the existing index.</p>
- <p>In either case, documents are added with the <a
- href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method.
- When finished adding documents, <a href="#close()"><b>close</b></a> should be called.</p>
+ <p>In either case, documents are added with <a
+ href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a>
+ and removed with <a
+ href="#deleteDocuments(org.apache.lucene.index.Term)"><b>deleteDocuments</b></a>.
+ A document can be updated with <a href="#updateDocument(org.apache.lucene.index.Term, org.apache.lucene.document.Document)"><b>updateDocument</b></a>
+ (which just deletes and then adds). When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
+ <p>These changes are buffered in memory and periodically
+ flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are
+ enough buffered deletes (see {@link
+ #setMaxBufferedDeleteTerms}) or enough added documents
+ (see {@link #setMaxBufferedDocs}) since the last flush,
+ whichever is sooner. When a flush occurs, both pending
+ deletes and added documents are flushed to the index. A
+ flush may also trigger one or more segment merges.</p>
+
+ <a name="autoCommit"></a>
+ <p>The optional <code>autoCommit</code> argument to the
+ <a href="#IndexWriter(org.apache.lucene.store.Directory, boolean, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
+ controls visibility of the changes to {@link IndexReader} instances reading the same index.
+ When this is <code>false</code>, changes are not
+ visible until {@link #close()} is called.
+ Note that changes will still be flushed to the
+ {@link org.apache.lucene.store.Directory} as new files,
+ but are not committed (no new <code>segments_N</code> file
+ is written referencing the new files) until {@link #close} is
+ called. If something goes terribly wrong (for example the
+ JVM crashes) before {@link #close()}, then
+ the index will reflect none of the changes made (it will
+ remain in its starting state).
+ You can also call {@link #abort()}, which closes the writer without committing any
+ changes, and removes any index
+ files that had been flushed but are now unreferenced.
+ This mode is useful for preventing readers from refreshing
+ at a bad time (for example after you've done all your
+ deletes but before you've done your adds).
+ It can also be used to implement simple single-writer
+ transactional semantics ("all or none").</p>
+
+ <p>When <code>autoCommit</code> is <code>true</code> then
+ every flush is also a commit ({@link IndexReader}
+ instances will see each flush as changes to the index).
+ This is the default, to match the behavior before 2.2.
+ When running in this mode, be careful not to refresh your
+ readers while optimize or segment merges are taking place
+ as this can tie up substantial disk space.</p>
+
<p>If an index will not have more documents added for a while and optimal search
performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
method should be called before the index is closed.</p>
-
- <p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open
- another IndexWriter on the same directory will lead to a
+
+ <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
+ another <code>IndexWriter</code> on the same directory will lead to a
{@link LockObtainFailedException}. The {@link LockObtainFailedException}
is also thrown if an IndexReader on the same directory is used to delete documents
from the index.</p>
- <p>As of <b>2.1</b>, IndexWriter can now delete documents
- by {@link Term} (see {@link #deleteDocuments} ) and update
- (delete then add) documents (see {@link #updateDocument}).
- Deletes are buffered until {@link
- #setMaxBufferedDeleteTerms} <code>Terms</code> at which
- point they are flushed to the index. Note that a flush
- occurs when there are enough buffered deletes or enough
- added documents, whichever is sooner. When a flush
- occurs, both pending deletes and added documents are
- flushed to the index.</p>
+ <a name="deletionPolicy"></a>
+ <p>Expert: <code>IndexWriter</code> allows an optional
+ {@link IndexDeletionPolicy} implementation to be
+ specified. You can use this to control when prior commits
+ are deleted from the index. The default policy is {@link
+ KeepOnlyLastCommitDeletionPolicy} which removes all prior
+ commits as soon as a new commit is done (this matches
+ behavior before 2.2). Creating your own policy can allow
+ you to explicitly keep previous "point in time" commits
+ alive in the index for some time, to allow readers to
+ refresh to the new commit without having the old commit
+ deleted out from under them. This is necessary on
+ filesystems like NFS that do not support "delete on last
+ close" semantics, which Lucene's "point in time" search
+ normally relies on. </p>
*/
public class IndexWriter {
@@ -83,6 +135,9 @@
private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
+ /**
+ * Name of the write lock in the index.
+ */
public static final String WRITE_LOCK_NAME = "write.lock";
/**
@@ -120,11 +175,13 @@
private Similarity similarity = Similarity.getDefault(); // how to normalize
- private boolean inTransaction = false; // true iff we are in a transaction
private boolean commitPending; // true if segmentInfos has changes not yet committed
- private HashSet protectedSegments; // segment names that should not be deleted until commit
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private boolean localAutoCommit; // saved autoCommit during local transaction
+ private boolean autoCommit = true; // false if we should commit only on close
+
SegmentInfos segmentInfos = new SegmentInfos(); // the segments
SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
@@ -238,7 +295,7 @@
*/
public IndexWriter(String path, Analyzer a, boolean create)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(path, a, create);
+ init(FSDirectory.getDirectory(path), a, create, true, null, true);
}
/**
@@ -263,7 +320,7 @@
*/
public IndexWriter(File path, Analyzer a, boolean create)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(path, a, create);
+ init(FSDirectory.getDirectory(path), a, create, true, null, true);
}
/**
@@ -288,14 +345,14 @@
*/
public IndexWriter(Directory d, Analyzer a, boolean create)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, create, false);
+ init(d, a, create, false, null, true);
}
/**
* Constructs an IndexWriter for the index in
- * <code>path</code>, creating it first if it does not
- * already exist, otherwise appending to the existing
- * index. Text will be analyzed with <code>a</code>.
+ * <code>path</code>, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * <code>a</code>.
*
* @param path the path to the index directory
* @param a the analyzer to use
@@ -309,18 +366,13 @@
*/
public IndexWriter(String path, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(path)) {
- init(path, a, false);
- } else {
- init(path, a, true);
- }
+ init(FSDirectory.getDirectory(path), a, true, null, true);
}
/**
* Constructs an IndexWriter for the index in
- * <code>path</code>, creating it first if it does not
- * already exist, otherwise appending to the existing
- * index. Text will be analyzed with
+ * <code>path</code>, first creating it if it does not
+ * already exist. Text will be analyzed with
* <code>a</code>.
*
* @param path the path to the index directory
@@ -335,18 +387,14 @@
*/
public IndexWriter(File path, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(path)) {
- init(path, a, false);
- } else {
- init(path, a, true);
- }
+ init(FSDirectory.getDirectory(path), a, true, null, true);
}
/**
* Constructs an IndexWriter for the index in
- * <code>d</code>, creating it first if it does not
- * already exist, otherwise appending to the existing
- * index. Text will be analyzed with <code>a</code>.
+ * <code>d</code>, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * <code>a</code>.
*
* @param d the index directory
* @param a the analyzer to use
@@ -360,28 +408,124 @@
*/
public IndexWriter(Directory d, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(d)) {
- init(d, a, false, false);
- } else {
- init(d, a, true, false);
- }
+ init(d, a, false, null, true);
}
- private void init(String path, Analyzer a, final boolean create)
+ /**
+ * Constructs an IndexWriter for the index in
+ * <code>d</code>, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * <code>a</code>.
+ *
+ * @param d the index directory
+ * @param autoCommit see <a href="#autoCommit">above</a>
+ * @param a the analyzer to use
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (<code>write.lock</code> could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(FSDirectory.getDirectory(path), a, create, true);
+ init(d, a, false, null, autoCommit);
}
- private void init(File path, Analyzer a, final boolean create)
+ /**
+ * Constructs an IndexWriter for the index in <code>d</code>.
+ * Text will be analyzed with <code>a</code>. If <code>create</code>
+ * is true, then a new, empty index will be created in
+ * <code>d</code>, replacing the index already there, if any.
+ *
+ * @param d the index directory
+ * @param autoCommit see <a href="#autoCommit">above</a>
+ * @param a the analyzer to use
+ * @param create <code>true</code> to create the index or overwrite
+ * the existing one; <code>false</code> to append to the existing
+ * index
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (<code>write.lock</code> could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and <code>create</code> is
+ * <code>false</code> or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ init(d, a, create, false, null, autoCommit);
+ }
+
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in <code>d</code>,
+ * first creating it if it does not already exist. Text
+ * will be analyzed with <code>a</code>.
+ *
+ * @param d the index directory
+ * @param autoCommit see <a href="#autoCommit">above</a>
+ * @param a the analyzer to use
+ * @param deletionPolicy see <a href="#deletionPolicy">above</a>
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (<code>write.lock</code> could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(FSDirectory.getDirectory(path), a, create, true);
+ init(d, a, false, deletionPolicy, autoCommit);
}
- private void init(Directory d, Analyzer a, final boolean create, boolean closeDir)
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in <code>d</code>.
+ * Text will be analyzed with <code>a</code>. If
+ * <code>create</code> is true, then a new, empty index
+ * will be created in <code>d</code>, replacing the index
+ * already there, if any.
+ *
+ * @param d the index directory
+ * @param autoCommit see <a href="#autoCommit">above</a>
+ * @param a the analyzer to use
+ * @param create <code>true</code> to create the index or overwrite
+ * the existing one; <code>false</code> to append to the existing
+ * index
+ * @param deletionPolicy see <a href="#deletionPolicy">above</a>
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (<code>write.lock</code> could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and <code>create</code> is
+ * <code>false</code> or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ init(d, a, create, false, deletionPolicy, autoCommit);
+ }
+
+ private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit)
throws CorruptIndexException, LockObtainFailedException, IOException {
+ if (IndexReader.indexExists(d)) {
+ init(d, a, false, closeDir, deletionPolicy, autoCommit);
+ } else {
+ init(d, a, true, closeDir, deletionPolicy, autoCommit);
+ }
+ }
+
+ private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
this.closeDir = closeDir;
directory = d;
analyzer = a;
+ this.infoStream = defaultInfoStream;
if (create) {
// Clear the write lock in case it's leftover:
@@ -410,13 +554,17 @@
segmentInfos.read(directory);
}
- // Create a deleter to keep track of which files can
- // be deleted:
- deleter = new IndexFileDeleter(segmentInfos, directory);
- deleter.setInfoStream(infoStream);
- deleter.findDeletableFiles();
- deleter.deleteFiles();
+ this.autoCommit = autoCommit;
+ if (!autoCommit) {
+ rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
+ }
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ deleter = new IndexFileDeleter(directory,
+ deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
+ segmentInfos, infoStream);
+
} catch (IOException e) {
this.writeLock.release();
this.writeLock = null;
@@ -533,11 +681,28 @@
return mergeFactor;
}
- /** If non-null, information about merges and a message when
- * maxFieldLength is reached will be printed to this.
+ /** If non-null, this will be the default infoStream used
+ * by a newly instantiated IndexWriter.
+ * @see #setInfoStream
*/
+ public static void setDefaultInfoStream(PrintStream infoStream) {
+ IndexWriter.defaultInfoStream = infoStream;
+ }
+
+ /**
+ * @see #setDefaultInfoStream
+ */
+ public static PrintStream getDefaultInfoStream() {
+ return IndexWriter.defaultInfoStream;
+ }
+
+ /** If non-null, information about merges, deletes and a
+ * message when maxFieldLength is reached will be printed
+ * to this.
+ */
public void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
+ deleter.setInfoStream(infoStream);
}
/**
@@ -613,6 +778,14 @@
*/
public synchronized void close() throws CorruptIndexException, IOException {
flushRamSegments();
+
+ if (commitPending) {
+ segmentInfos.write(directory); // now commit changes
+ deleter.checkpoint(segmentInfos, true);
+ commitPending = false;
+ rollbackSegmentInfos = null;
+ }
+
ramDirectory.close();
if (writeLock != null) {
writeLock.release(); // release write lock
@@ -737,7 +910,9 @@
dw.setInfoStream(infoStream);
String segmentName = newRamSegmentName();
dw.addDocument(segmentName, doc);
- return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
+ SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false);
+ si.setNumFields(dw.getNumFields());
+ return si;
}
/**
@@ -871,6 +1046,7 @@
*/
private PrintStream infoStream = null;
+ private static PrintStream defaultInfoStream = null;
/** Merges all segments together into a single segment,
* optimizing an index for search.
@@ -949,21 +1125,18 @@
* merges that happen (or ram segments flushed) will not
* write a new segments file and will not remove any files
* that were present at the start of the transaction. You
- * must make a matched (try/finall) call to
+ * must make a matched (try/finally) call to
* commitTransaction() or rollbackTransaction() to finish
* the transaction.
*/
private void startTransaction() throws IOException {
- if (inTransaction) {
- throw new IOException("transaction is already in process");
+ localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
+ localAutoCommit = autoCommit;
+ if (localAutoCommit) {
+ flushRamSegments();
+ // Turn off auto-commit during our local transaction:
+ autoCommit = false;
}
- rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
- protectedSegments = new HashSet();
- for(int i=0;i<segmentInfos.size();i++) {
- SegmentInfo si = (SegmentInfo) segmentInfos.elementAt(i);
- protectedSegments.add(si.name);
- }
- inTransaction = true;
}
/*
@@ -972,20 +1145,21 @@
*/
private void rollbackTransaction() throws IOException {
+ // First restore autoCommit in case we hit an exception below:
+ autoCommit = localAutoCommit;
+
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write once").
segmentInfos.clear();
- segmentInfos.addAll(rollbackSegmentInfos);
+ segmentInfos.addAll(localRollbackSegmentInfos);
+ localRollbackSegmentInfos = null;
- // Ask deleter to locate unreferenced files & remove
- // them:
- deleter.clearPendingFiles();
- deleter.findDeletableFiles();
- deleter.deleteFiles();
-
- clearTransaction();
+ // Ask deleter to locate unreferenced files we had
+ // created & remove them:
+ deleter.checkpoint(segmentInfos, false);
+ deleter.refresh();
}
/*
@@ -994,35 +1168,79 @@
* accumulated during the transaction
*/
private void commitTransaction() throws IOException {
- if (commitPending) {
- boolean success = false;
- try {
- // If we hit eg disk full during this write we have
- // to rollback.:
- segmentInfos.write(directory); // commit changes
- success = true;
- } finally {
- if (!success) {
- rollbackTransaction();
- }
+
+ // First restore autoCommit in case we hit an exception below:
+ autoCommit = localAutoCommit;
+
+ boolean success = false;
+ try {
+ checkpoint();
+ success = true;
+ } finally {
+ if (!success) {
+ rollbackTransaction();
}
- deleter.commitPendingFiles();
- commitPending = false;
}
+ localRollbackSegmentInfos = null;
- clearTransaction();
+ // Give deleter a chance to remove files now:
+ deleter.checkpoint(segmentInfos, autoCommit);
}
- /* Should only be called by rollbackTransaction &
- * commitTransaction */
- private void clearTransaction() {
- protectedSegments = null;
- rollbackSegmentInfos = null;
- inTransaction = false;
- }
+ /**
+ * Close the <code>IndexWriter</code> without committing
+ * any of the changes that have occurred since it was
+ * opened. This removes any temporary files that had been
+ * created, after which the state of the index will be the
+ * same as it was when this writer was first opened. This
+ * can only be called when this IndexWriter was opened
+ * with <code>autoCommit=false</code>.
+ * @throws IllegalStateException if this is called when
+ * the writer was opened with <code>autoCommit=true</code>.
+ * @throws IOException if there is a low-level IO error
+ */
+ public void abort() throws IOException {
+ if (!autoCommit) {
+ // Keep the same segmentInfos instance but replace all
+ // of its SegmentInfo instances. This is so the next
+ // attempt to commit using this instance of IndexWriter
+ // will always write to a new generation ("write once").
+ segmentInfos.clear();
+ segmentInfos.addAll(rollbackSegmentInfos);
+ // Ask deleter to locate unreferenced files & remove
+ // them:
+ deleter.checkpoint(segmentInfos, false);
+ deleter.refresh();
+ ramSegmentInfos = new SegmentInfos();
+ bufferedDeleteTerms.clear();
+ numBufferedDeleteTerms = 0;
+
+ commitPending = false;
+ close();
+
+ } else {
+ throw new IllegalStateException("abort() can only be called when IndexWriter was opened with autoCommit=false");
+ }
+ }
+
+ /*
+ * Called whenever the SegmentInfos has been updated and
+ * the index files referenced exist (correctly) in the
+ * index directory. If we are in autoCommit mode, we
+ * commit the change immediately. Else, we mark
+ * commitPending.
+ */
+ private void checkpoint() throws IOException {
+ if (autoCommit) {
+ segmentInfos.write(directory);
+ } else {
+ commitPending = true;
+ }
+ }
+
/** Merges all segments from an array of indexes into this index.
*
* <p>This may be used to parallelize batch indexing. A large document
@@ -1266,16 +1484,13 @@
final String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(this, mergedName);
- final Vector segmentsToDelete = new Vector();
SegmentInfo info;
- String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
IndexReader sReader = null;
try {
if (segmentInfos.size() == 1){ // add existing index, if any
sReader = SegmentReader.get(segmentInfos.info(0));
merger.add(sReader);
- segmentsToDelete.addElement(sReader); // queue segment for deletion
}
for (int i = 0; i < readers.length; i++) // add new indexes
@@ -1288,16 +1503,15 @@
try {
int docCount = merger.merge(); // merge 'em
- segmentInfos.setSize(0); // pop old infos & add new
- info = new SegmentInfo(mergedName, docCount, directory, false, true);
- segmentInfos.addElement(info);
- commitPending = true;
-
if(sReader != null) {
sReader.close();
sReader = null;
}
+ segmentInfos.setSize(0); // pop old infos & add new
+ info = new SegmentInfo(mergedName, docCount, directory, false, true);
+ segmentInfos.addElement(info);
+
success = true;
} finally {
@@ -1312,26 +1526,16 @@
sReader.close();
}
}
+
+ if (useCompoundFile) {
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
-
- if (useCompoundFile) {
boolean success = false;
- segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
- Vector filesToDelete;
-
startTransaction();
try {
-
- filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
-
+ merger.createCompoundFile(mergedName + ".cfs");
info.setUseCompoundFile(true);
- commitPending = true;
- success = true;
-
} finally {
if (!success) {
rollbackTransaction();
@@ -1339,9 +1543,6 @@
commitTransaction();
}
}
-
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteFiles(filesToDelete); // delete now unused files of segment
}
}
@@ -1500,14 +1701,12 @@
final String mergedName = newSegmentName();
SegmentMerger merger = null;
- final Vector segmentsToDelete = new Vector();
+ final List ramSegmentsToDelete = new ArrayList();
- String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
- String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
-
SegmentInfo newSegment = null;
int mergedDocCount = 0;
+ boolean anyDeletes = (bufferedDeleteTerms.size() != 0);
// This is try/finally to make sure merger's readers are closed:
try {
@@ -1522,9 +1721,9 @@
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
merger.add(reader);
- if ((reader.directory() == this.directory) || // if we own the directory
- (reader.directory() == this.ramDirectory))
- segmentsToDelete.addElement(reader); // queue segment for deletion
+ if (reader.directory() == this.ramDirectory) {
+ ramSegmentsToDelete.add(si);
+ }
}
}
@@ -1545,9 +1744,8 @@
newSegment = new SegmentInfo(mergedName, mergedDocCount,
directory, false, true);
}
-
- if (!inTransaction
- && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() > 0)) {
+
+ if (sourceSegments != ramSegmentInfos || anyDeletes) {
// Now save the SegmentInfo instances that
// we are replacing:
rollback = (SegmentInfos) segmentInfos.clone();
@@ -1565,19 +1763,12 @@
}
if (sourceSegments == ramSegmentInfos) {
- // Should not be necessary: no prior commit should
- // have left pending files, so just defensive:
- deleter.clearPendingFiles();
maybeApplyDeletes(doMerge);
doAfterFlush();
}
+
+ checkpoint();
- if (!inTransaction) {
- segmentInfos.write(directory); // commit before deleting
- } else {
- commitPending = true;
- }
-
success = true;
} finally {
@@ -1589,11 +1780,10 @@
if (sourceSegments == ramSegmentInfos) {
ramSegmentInfos.removeAllElements();
}
- } else if (!inTransaction) {
+ } else {
// Must rollback so our state matches index:
-
- if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size()) {
+ if (sourceSegments == ramSegmentInfos && !anyDeletes) {
// Simple case: newSegment may or may not have
// been added to the end of our segment infos,
// so just check & remove if so:
@@ -1611,14 +1801,8 @@
segmentInfos.addAll(rollback);
}
- // Erase any pending files that we were going to delete:
- // i.e. old del files added by SegmentReader.doCommit()
- deleter.clearPendingFiles();
-
- // Delete any partially created files:
- deleter.deleteFile(nextSegmentsFileName);
- deleter.findDeletableFiles();
- deleter.deleteFiles();
+ // Delete any partially created and now unreferenced files:
+ deleter.refresh();
}
}
} finally {
@@ -1626,53 +1810,33 @@
if (doMerge) merger.closeReaders();
}
- if (!inTransaction) {
- // Attempt to delete all files we just obsoleted:
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
- // Includes the old del files
- deleter.commitPendingFiles();
- } else {
- deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments
- }
+ // Delete the RAM segments
+ deleter.deleteDirect(ramDirectory, ramSegmentsToDelete);
+ // Give deleter a chance to remove files now.
+ deleter.checkpoint(segmentInfos, autoCommit);
+
if (useCompoundFile && doMerge) {
- segmentsInfosFileName = nextSegmentsFileName;
- nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
-
- Vector filesToDelete;
-
boolean success = false;
try {
- filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
+ merger.createCompoundFile(mergedName + ".cfs");
newSegment.setUseCompoundFile(true);
- if (!inTransaction) {
- segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
- }
+ checkpoint();
success = true;
} finally {
- if (!success && !inTransaction) {
+ if (!success) {
// Must rollback:
newSegment.setUseCompoundFile(false);
- deleter.deleteFile(mergedName + ".cfs");
- deleter.deleteFile(nextSegmentsFileName);
+ deleter.refresh();
}
}
-
- if (!inTransaction) {
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- }
-
- // We can delete these segments whether or not we are
- // in a transaction because we had just written them
- // above so they can't need protection by the
- // transaction:
- deleter.deleteFiles(filesToDelete); // delete now-unused segments
+
+ // Give deleter a chance to remove files now.
+ deleter.checkpoint(segmentInfos, autoCommit);
}
return mergedDocCount;
@@ -1692,7 +1856,6 @@
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
- reader.setDeleter(deleter);
// Apply delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
@@ -1718,7 +1881,6 @@
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(i));
- reader.setDeleter(deleter);
// Apply delete terms to disk segments
// except the one just flushed from ram.
@@ -1769,7 +1931,7 @@
}
// Number of ram segments a delete term applies to.
- private class Num {
+ private static class Num {
private int num;
Num(int num) {
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentWriter.java (revision 515500)
+++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy)
@@ -388,6 +388,9 @@
this.infoStream = infoStream;
}
+ int getNumFields() {
+ return fieldInfos.size();
+ }
}
final class Posting { // info about a Term in a doc
Index: src/java/org/apache/lucene/index/IndexFileDeleter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 515500)
+++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy)
@@ -18,284 +18,484 @@
*/
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexFileNameFilter;
import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.Vector;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collections;
-/**
- * A utility class (used by both IndexReader and
- * IndexWriter) to keep track of files that need to be
- * deleted because they are no longer referenced by the
- * index.
+/*
+ * This class keeps track of each SegmentInfos instance that
+ * is still "live", either because it corresponds to a
+ * segments_N in the Directory (a real commit) or because
+ * it's the in-memory SegmentInfos that a writer is actively
+ * updating but has not yet committed (currently this only
+ * applies when autoCommit=false in IndexWriter). This
+ * class uses simple reference counting to map the live
+ * SegmentInfos instances to individual files in the
+ * Directory.
+ *
+ * A separate deletion policy interface
+ * (IndexDeletionPolicy) is consulted on creation (onInit)
+ * and once per commit (onCommit), to decide when a commit
+ * should be removed.
+ *
+ * The current default deletion policy is {@link
+ * KeepOnlyLastCommitDeletionPolicy}, which removes all
+ * prior commits when a new commit has completed. This
+ * matches the behavior before 2.2.
+ *
+ * Note that you must hold the write.lock before
+ * instantiating this class. It opens segments_N file(s)
+ * directly with no retry logic.
*/
+
final class IndexFileDeleter {
- private Vector deletable;
- private HashSet pending;
+
+ /* Files that we tried to delete but failed (likely
+ * because they are open and we are running on Windows),
+ * so we will retry them again later: */
+ private List deletable;
+
+ /* Reference count for all files in the index. Maps
+ * String to RefCount (class below) instances: */
+ private HashMap refCounts = new HashMap();
+
+ /* Holds all commits (segments_N) currently in the index.
+ * This will have just 1 commit if you are using the
+ * default delete policy (KeepOnlyLastCommitDeletionPolicy).
+ * Other policies may leave commit points live for longer
+ * in which case this list would be longer than 1: */
+ private List commits = new ArrayList();
+
+ /* Holds files we had incref'd from the previous
+ * non-commit checkpoint: */
+ private List lastFiles = new ArrayList();
+
+ private PrintStream infoStream;
+ private List toDelete = new ArrayList();
private Directory directory;
- private SegmentInfos segmentInfos;
- private PrintStream infoStream;
+ private IndexDeletionPolicy policy;
- IndexFileDeleter(SegmentInfos segmentInfos, Directory directory)
- throws IOException {
- this.segmentInfos = segmentInfos;
- this.directory = directory;
- }
- void setSegmentInfos(SegmentInfos segmentInfos) {
- this.segmentInfos = segmentInfos;
- }
- SegmentInfos getSegmentInfos() {
- return segmentInfos;
- }
-
void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
}
+
+ private void message(String message) {
+ infoStream.println(this + " " + Thread.currentThread().getName() + ": " + message);
+ }
- /** Determine index files that are no longer referenced
- * and therefore should be deleted. This is called once
- * (by the writer), and then subsequently we add onto
- * deletable any files that are no longer needed at the
- * point that we create the unused file (eg when merging
- * segments), and we only remove from deletable when a
- * file is successfully deleted.
+ /**
+ * Initialize the deleter: find all previous commits in
+ * the Directory, incref the files they reference, call
+ * the policy to let it delete commits. The incoming
+ * segmentInfos must have been loaded from a commit point
+ * and not yet modified. This will remove any files not
+ * referenced by any of the commits.
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
+ public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream)
+ throws CorruptIndexException, IOException {
- void findDeletableFiles() throws IOException {
+ this.infoStream = infoStream;
+ this.policy = policy;
+ this.directory = directory;
- // Gather all "current" segments:
- HashMap current = new HashMap();
- for(int j=0;j<segmentInfos.size();j++) {
- SegmentInfo segmentInfo = (SegmentInfo) segmentInfos.elementAt(j);
- current.put(segmentInfo.name, segmentInfo);
- }
-
- // Then go through all files in the Directory that are
- // Lucene index files, and add to deletable if they are
- // not referenced by the current segments info:
-
- String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
+ // First pass: walk the files and initialize our ref
+ // counts:
+ long currentGen = segmentInfos.getGeneration();
IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
String[] files = directory.list();
- for (int i = 0; i < files.length; i++) {
+ CommitPoint currentCommitPoint = null;
- if (filter.accept(null, files[i]) && !files[i].equals(segmentsInfosFileName) && !files[i].equals(IndexFileNames.SEGMENTS_GEN)) {
+ for(int i=0;i<files.length;i++) {
- String segmentName;
- String extension;
+ String fileName = files[i];
- // First remove any extension:
- int loc = files[i].indexOf('.');
- if (loc != -1) {
- extension = files[i].substring(1+loc);
- segmentName = files[i].substring(0, loc);
- } else {
- extension = null;
- segmentName = files[i];
- }
+ if (filter.accept(null, fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
- // Then, remove any generation count:
- loc = segmentName.indexOf('_', 1);
- if (loc != -1) {
- segmentName = segmentName.substring(0, loc);
- }
+ // Add this file to refCounts with initial count 0:
+ getRefCount(fileName);
- // Delete this file if it's not a "current" segment,
- // or, it is a single index file but there is now a
- // corresponding compound file:
- boolean doDelete = false;
+ if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
- if (!current.containsKey(segmentName)) {
- // Delete if segment is not referenced:
- doDelete = true;
- } else {
- // OK, segment is referenced, but file may still
- // be orphan'd:
- SegmentInfo info = (SegmentInfo) current.get(segmentName);
-
- if (filter.isCFSFile(files[i]) && info.getUseCompoundFile()) {
- // This file is in fact stored in a CFS file for
- // this segment:
- doDelete = true;
- } else {
-
- if ("del".equals(extension)) {
- // This is a _segmentName_N.del file:
- if (!files[i].equals(info.getDelFileName())) {
- // If this is a seperate .del file, but it
- // doesn't match the current del filename for
- // this segment, then delete it:
- doDelete = true;
- }
- } else if (extension != null && extension.startsWith("s") && extension.matches("s\\d+")) {
- int field = Integer.parseInt(extension.substring(1));
- // This is a _segmentName_N.sX file:
- if (!files[i].equals(info.getNormFileName(field))) {
- // This is an orphan'd separate norms file:
- doDelete = true;
- }
- } else if ("cfs".equals(extension) && !info.getUseCompoundFile()) {
- // This is a partially written
- // _segmentName.cfs:
- doDelete = true;
+ // This is a commit (segments or segments_N), and
+ // it's valid (<= the max gen). Load it, then
+ // incref all files it refers to:
+ if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) {
+ if (infoStream != null) {
+ message("init: load commit \"" + fileName + "\"");
}
+ SegmentInfos sis = new SegmentInfos();
+ sis.read(directory, fileName);
+ CommitPoint commitPoint = new CommitPoint(sis);
+ if (sis.getGeneration() == segmentInfos.getGeneration()) {
+ currentCommitPoint = commitPoint;
+ }
+ commits.add(commitPoint);
+ incRef(sis, true);
}
}
+ }
+ }
- if (doDelete) {
- addDeletableFile(files[i]);
- if (infoStream != null) {
- infoStream.println("IndexFileDeleter: file \"" + files[i] + "\" is unreferenced in index and will be deleted on next commit");
- }
+ if (currentCommitPoint == null) {
+ throw new CorruptIndexException("failed to locate current segments_N file");
+ }
+
+ // We keep commits list in sorted order (oldest to newest):
+ Collections.sort(commits);
+
+ // Now delete anything with ref count at 0. These are
+ // presumably abandoned files eg due to crash of
+ // IndexWriter.
+ Iterator it = refCounts.keySet().iterator();
+ while(it.hasNext()) {
+ String fileName = (String) it.next();
+ RefCount rc = (RefCount) refCounts.get(fileName);
+ if (0 == rc.count) {
+ if (infoStream != null) {
+ message("init: removing unreferenced file \"" + fileName + "\"");
}
+ deleteFile(fileName);
}
}
+
+ // Finally, give policy a chance to remove things on
+ // startup:
+ policy.onInit(commits);
+
+ // It's OK for the onInit to remove the current commit
+ // point; we just have to checkpoint our in-memory
+ // SegmentInfos to protect those files that it uses:
+ if (currentCommitPoint.deleted) {
+ checkpoint(segmentInfos, false);
+ }
+
+ deleteCommits();
}
- /*
- * Some operating systems (e.g. Windows) don't permit a file to be deleted
- * while it is opened for read (e.g. by another process or thread). So we
- * assume that when a delete fails it is because the file is open in another
- * process, and queue the file for subsequent deletion.
+ /**
+ * Remove the CommitPoints in the toDelete List by
+ * DecRef'ing all files from each SegmentInfos.
*/
+ private void deleteCommits() throws IOException {
- void deleteSegments(Vector segments) throws IOException {
+ int size = toDelete.size();
- deleteFiles(); // try to delete files that we couldn't before
+ if (size > 0) {
- for (int i = 0; i < segments.size(); i++) {
- SegmentReader reader = (SegmentReader)segments.elementAt(i);
- if (reader.directory() == this.directory)
- deleteFiles(reader.files()); // try to delete our files
- else
- deleteFiles(reader.files(), reader.directory()); // delete other files
+ // First decref all files that had been referred to by
+ // the now-deleted commits:
+ for(int i=0;i<size;i++) {
+ CommitPoint commit = (CommitPoint) toDelete.get(i);
+ if (infoStream != null) {
+ message("deleteCommits: now remove commit \"" + commit.getSegmentsFileName() + "\"");
+ }
+ int size2 = commit.files.size();
+ for(int j=0;j<size2;j++) {
+ decRef((List) commit.files.get(j));
+ }
+ decRef(commit.getSegmentsFileName());
+ }
+ toDelete.clear();
+
+ // Now compact commits to remove deleted ones:
+ size = commits.size();
+ int readFrom = 0;
+ int writeTo = 0;
+ while(readFrom < size) {
+ CommitPoint commit = (CommitPoint) commits.get(readFrom);
+ if (!commit.deleted) {
+ if (writeTo != readFrom) {
+ commits.set(writeTo, commits.get(readFrom));
+ }
+ writeTo++;
+ }
+ readFrom++;
+ }
+
+ while(size > writeTo) {
+ commits.remove(size-1);
+ size--;
+ }
}
}
/**
- * Delete these segments, as long as they are not listed
- * in protectedSegments. If they are, then, instead, add
- * them to the pending set.
- */
-
- void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException {
+ * Writer calls this when it has hit an error and had to
+ * roll back, to tell us that there may now be
+ * unreferenced files in the filesystem. So we re-list
+ * the filesystem and delete such files:
+ */
+ public void refresh() throws IOException {
+ String[] files = directory.list();
+ IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
+ for(int i=0;i<files.length;i++) {
+ String fileName = files[i];
+ if (filter.accept(null, fileName) && !refCounts.containsKey(fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
+ // Unreferenced file, so remove it
+ if (infoStream != null) {
+ message("refresh: removing newly created unreferenced file \"" + fileName + "\"");
+ }
+ deleteFile(fileName);
+ }
+ }
+ }
- deleteFiles(); // try to delete files that we couldn't before
+ /**
+ * Writer calls this when it has made a "consistent
+ * change" to the index, meaning new files are written to
+ * the index and the in-memory SegmentInfos have been
+ * modified to point to those files.
+ *
+ * This may or may not be a commit (segments_N may or may
+ * not have been written).
+ *
+ * We simply incref the files referenced by the new
+ * SegmentInfos and decref the files we had previously
+ * seen (if any).
+ *
+ * If this is a commit, we also call the policy to give it
+ * a chance to remove other commits. If any commits are
+ * removed, we decref their files as well.
+ */
+ public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
- for (int i = 0; i < segments.size(); i++) {
- SegmentReader reader = (SegmentReader)segments.elementAt(i);
- if (reader.directory() == this.directory) {
- if (protectedSegments.contains(reader.getSegmentName())) {
- addPendingFiles(reader.files()); // record these for deletion on commit
- } else {
- deleteFiles(reader.files()); // try to delete our files
+ if (infoStream != null) {
+ message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [isCommit = " + isCommit + "]");
+ }
+
+ // Try again now to delete any previously un-deletable
+ // files (because they were in use, on Windows):
+ if (deletable != null) {
+ List oldDeletable = deletable;
+ deletable = null;
+ int size = oldDeletable.size();
+ for(int i=0;i<size;i++) {
+ deleteFile((String) oldDeletable.get(i));
+ }
+ }
+
+ // Incref the files:
+ incRef(segmentInfos, isCommit);
+
+ if (isCommit) {
+ // Append to our commits list:
+ commits.add(new CommitPoint(segmentInfos));
+
+ // Tell policy so it can remove commits:
+ policy.onCommit(commits);
+
+ // Decref files for commits that were deleted by the policy:
+ deleteCommits();
+ }
+
+ // DecRef old files from the last checkpoint, if any:
+ int size = lastFiles.size();
+ if (size > 0) {
+ for(int i=0;i<size;i++) {
+ decRef((List) lastFiles.get(i));
+ }
+ lastFiles.clear();
+ }
+
+ if (!isCommit) {
+ // Save files so we can decr on next checkpoint/commit:
+ size = segmentInfos.size();
+ for(int i=0;i<size;i++) {
+ SegmentInfo segmentInfo = segmentInfos.info(i);
+ if (segmentInfo.dir == directory) {
+ lastFiles.add(segmentInfo.files());
}
- } else {
- deleteFiles(reader.files(), reader.directory()); // delete other files
}
}
}
-
- void deleteFiles(Vector files, Directory directory)
- throws IOException {
- for (int i = 0; i < files.size(); i++)
- directory.deleteFile((String)files.elementAt(i));
+
+ private void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
+ int size = segmentInfos.size();
+ for(int i=0;i<size;i++) {
+ SegmentInfo segmentInfo = segmentInfos.info(i);
+ if (segmentInfo.dir == directory) {
+ incRef(segmentInfo.files());
+ }
+ }
+
+ if (isCommit) {
+ // Since this is a commit point, also incref its
+ // segments_N file:
+ getRefCount(segmentInfos.getCurrentSegmentFileName()).IncRef();
+ }
}
- void deleteFiles(Vector files)
- throws IOException {
- deleteFiles(); // try to delete files that we couldn't before
- for (int i = 0; i < files.size(); i++) {
- deleteFile((String) files.elementAt(i));
+ private void incRef(List files) throws IOException {
+ int size = files.size();
+ for(int i=0;i<size;i++) {
+ String fileName = (String) files.get(i);
+ RefCount rc = getRefCount(fileName);
+ if (infoStream != null) {
+ message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
+ }
+ rc.IncRef();
}
}
- void deleteFile(String file)
+ private void decRef(List files) throws IOException {
+ int size = files.size();
+ for(int i=0;i<size;i++) {
+ decRef((String) files.get(i));
+ }
+ }
+
+ private void decRef(String fileName) throws IOException {
+ RefCount rc = getRefCount(fileName);
+ if (infoStream != null) {
+ message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
+ }
+ if (0 == rc.DecRef()) {
+ // This file is no longer referenced by any past
+ // commit points nor by the in-memory SegmentInfos:
+ deleteFile(fileName);
+ refCounts.remove(fileName);
+ }
+ }
+
+ private RefCount getRefCount(String fileName) {
+ RefCount rc;
+ if (!refCounts.containsKey(fileName)) {
+ rc = new RefCount();
+ refCounts.put(fileName, rc);
+ } else {
+ rc = (RefCount) refCounts.get(fileName);
+ }
+ return rc;
+ }
+
+ private void deleteFile(String fileName)
throws IOException {
try {
- directory.deleteFile(file); // try to delete each file
+ if (infoStream != null) {
+ message("delete \"" + fileName + "\"");
+ }
+ directory.deleteFile(fileName);
} catch (IOException e) { // if delete fails
- if (directory.fileExists(file)) {
- if (infoStream != null)
- infoStream.println("IndexFileDeleter: unable to remove file \"" + file + "\": " + e.toString() + "; Will re-try later.");
- addDeletableFile(file); // add to deletable
+ if (directory.fileExists(fileName)) {
+
+ // Some operating systems (e.g. Windows) don't
+ // permit a file to be deleted while it is opened
+ // for read (e.g. by another process or thread). So
+ // we assume that when a delete fails it is because
+ // the file is open in another process, and queue
+ // the file for subsequent deletion.
+
+ if (infoStream != null) {
+ message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
+ }
+ if (deletable == null) {
+ deletable = new ArrayList();
+ }
+ deletable.add(fileName); // add to deletable
}
}
}
- void clearPendingFiles() {
- pending = null;
- }
-
- /*
- Record that the files for these segments should be
- deleted, once the pending deletes are committed.
+ /**
+ * Blindly delete the files used by the specific segments,
+ * with no reference counting and no retry. This is only
+ * currently used by writer to delete its RAM segments
+ * from a RAMDirectory.
*/
- void addPendingSegments(Vector segments) throws IOException {
- for (int i = 0; i < segments.size(); i++) {
- SegmentReader reader = (SegmentReader)segments.elementAt(i);
- if (reader.directory() == this.directory) {
- addPendingFiles(reader.files());
+ public void deleteDirect(Directory otherDir, List segments) throws IOException {
+ int size = segments.size();
+ for(int i=0;i<size;i++) {
+ List toDelete = ((SegmentInfo) segments.get(i)).files();
+ int size2 = toDelete.size();
+ for(int j=0;j<size2;j++) {
+ otherDir.deleteFile((String) toDelete.get(j));
}
}
}
- /*
- Record list of files for deletion, but do not delete
- them until commitPendingFiles is called.
- */
- void addPendingFiles(Vector files) {
- for(int i=0;i<files.size();i++) {
- addPendingFile((String) files.elementAt(i));
+ /**
+ * Tracks the reference count for a single index file:
+ */
+ final private static class RefCount {
+
+ int count;
+
+ final private int IncRef() {
+ return ++count;
}
- }
- /*
- Record a file for deletion, but do not delete it until
- commitPendingFiles is called.
- */
- void addPendingFile(String fileName) {
- if (pending == null) {
- pending = new HashSet();
+ final private int DecRef() {
+ return --count;
}
- pending.add(fileName);
}
- void commitPendingFiles() throws IOException {
- if (pending != null) {
- if (deletable == null) {
- deletable = new Vector();
+ /**
+ * Holds details for each commit point. This class is
+ * also passed to the deletion policy. Note: this class
+ * has a natural ordering that is inconsistent with
+ * equals.
+ */
+
+ final private class CommitPoint implements Comparable, IndexCommitPoint {
+
+ long gen;
+ List files;
+ String segmentsFileName;
+ boolean deleted;
+
+ public CommitPoint(SegmentInfos segmentInfos) throws IOException {
+ segmentsFileName = segmentInfos.getCurrentSegmentFileName();
+ int size = segmentInfos.size();
+ files = new ArrayList(size);
+ gen = segmentInfos.getGeneration();
+ for(int i=0;i<size;i++) {
+ SegmentInfo segmentInfo = segmentInfos.info(i);
+ if (segmentInfo.dir == directory) {
+ files.add(segmentInfo.files());
+ }
}
- Iterator it = pending.iterator();
- while(it.hasNext()) {
- deletable.addElement(it.next());
- }
- pending = null;
- deleteFiles();
}
- }
- void addDeletableFile(String fileName) {
- if (deletable == null) {
- deletable = new Vector();
+ /**
+ * Get the segments_N file for this commit point.
+ */
+ public String getSegmentsFileName() {
+ return segmentsFileName;
}
- deletable.addElement(fileName);
- }
- void deleteFiles()
- throws IOException {
- if (deletable != null) {
- Vector oldDeletable = deletable;
- deletable = null;
- deleteFiles(oldDeletable); // try to delete deletable
+ /**
+ * Called only be the deletion policy, to remove this
+ * commit point from the index.
+ */
+ public void delete() {
+ if (!deleted) {
+ deleted = true;
+ toDelete.add(this);
+ }
}
+
+ public int compareTo(Object obj) {
+ CommitPoint commit = (CommitPoint) obj;
+ if (gen < commit.gen) {
+ return -1;
+ } else if (gen > commit.gen) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
}
}
Index: src/java/org/apache/lucene/index/SegmentInfo.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfo.java (revision 515500)
+++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy)
@@ -21,6 +21,8 @@
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
final class SegmentInfo {
public String name; // unique name in dir
@@ -50,6 +52,9 @@
// and true for newly created merged segments (both
// compound and non compound).
+ private List files; // cached list of files that this segment uses
+ // in the Directory
+
public SegmentInfo(String name, int docCount, Directory dir) {
this.name = name;
this.docCount = docCount;
@@ -71,6 +76,7 @@
* Copy everything from src SegmentInfo into our instance.
*/
void reset(SegmentInfo src) {
+ files = null;
name = src.name;
docCount = src.docCount;
dir = src.dir;
@@ -134,7 +140,7 @@
if (!preLockless) {
// This is a FORMAT_LOCKLESS segment, which means
- // there are no norms:
+ // there are no separate norms:
for(int i=0;i<numFields;i++) {
normGen[i] = -1;
}
@@ -174,10 +180,12 @@
} else {
delGen++;
}
+ files = null;
}
void clearDelGen() {
delGen = -1;
+ files = null;
}
public Object clone () {
@@ -199,7 +207,7 @@
return null;
} else {
// If delGen is 0, it's the pre-lockless-commit file format
- return IndexFileNames.fileNameFromGeneration(name, ".del", delGen);
+ return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@@ -280,6 +288,7 @@
} else {
normGen[fieldIndex]++;
}
+ files = null;
}
/**
@@ -326,6 +335,7 @@
} else {
this.isCompoundFile = -1;
}
+ files = null;
}
/**
@@ -338,7 +348,7 @@
} else if (isCompoundFile == 1) {
return true;
} else {
- return dir.fileExists(name + ".cfs");
+ return dir.fileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
}
}
@@ -361,4 +371,87 @@
}
output.writeByte(isCompoundFile);
}
+
+ /*
+ * Return all files referenced by this SegmentInfo. The
+ * returns List is a locally cached List so you should not
+ * modify it.
+ */
+
+ public List files() throws IOException {
+
+ if (files != null) {
+ // Already cached:
+ return files;
+ }
+
+ files = new ArrayList();
+
+ boolean useCompoundFile = getUseCompoundFile();
+
+ if (useCompoundFile) {
+ files.add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ } else {
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) {
+ String ext = IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i];
+ String fileName = name + "." + ext;
+ if (dir.fileExists(fileName)) {
+ files.add(fileName);
+ }
+ }
+ }
+
+ String delFileName = IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
+ if (delFileName != null && (delGen > 0 || dir.fileExists(delFileName))) {
+ files.add(delFileName);
+ }
+
+ // Careful logic for norms files:
+ if (normGen != null) {
+ for(int i=0;i<normGen.length;i++) {
+ long gen = normGen[i];
+ if (gen > 0) {
+ // Definitely a separate norm file, with generation:
+ files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
+ } else if (-1 == gen) {
+ // No separate norms but maybe non-separate norms
+ // in the non compound file case:
+ if (!hasSingleNormFile && !useCompoundFile) {
+ String fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i;
+ if (dir.fileExists(fileName)) {
+ files.add(fileName);
+ }
+ }
+ } else if (0 == gen) {
+ // Pre-2.1: we have to check file existence
+ String fileName = null;
+ if (useCompoundFile) {
+ fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
+ } else if (!hasSingleNormFile) {
+ fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i;
+ }
+ if (fileName != null && dir.fileExists(fileName)) {
+ files.add(fileName);
+ }
+ }
+ }
+ } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) {
+ // Pre-2.1: we have to scan the dir to find all
+ // matching _X.sN/_X.fN files for our segment:
+ String prefix;
+ if (useCompoundFile)
+ prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
+ else
+ prefix = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION;
+ int prefixLength = prefix.length();
+ String[] allFiles = dir.list();
+ for(int i=0;i<allFiles.length;i++) {
+ String fileName = allFiles[i];
+ if (fileName.length() > prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) {
+ files.add(fileName);
+ }
+ }
+ }
+ return files;
+ }
}
Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNameFilter.java (revision 515500)
+++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (working copy)
@@ -31,12 +31,17 @@
static IndexFileNameFilter singleton = new IndexFileNameFilter();
private HashSet extensions;
+ private HashSet extensionsInCFS;
public IndexFileNameFilter() {
extensions = new HashSet();
for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
extensions.add(IndexFileNames.INDEX_EXTENSIONS[i]);
}
+ extensionsInCFS = new HashSet();
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) {
+ extensionsInCFS.add(IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]);
+ }
}
/* (non-Javadoc)
@@ -72,10 +77,7 @@
int i = name.lastIndexOf('.');
if (i != -1) {
String extension = name.substring(1+i);
- if (extensions.contains(extension) &&
- !extension.equals("del") &&
- !extension.equals("gen") &&
- !extension.equals("cfs")) {
+ if (extensionsInCFS.contains(extension)) {
return true;
}
if (extension.startsWith("f") &&
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (revision 515500)
+++ src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -77,15 +77,6 @@
private void reWrite(SegmentInfo si) throws IOException {
// NOTE: norms are re-written in regular directory, not cfs
-
- String oldFileName = si.getNormFileName(this.number);
- if (oldFileName != null && !oldFileName.endsWith("." + IndexFileNames.NORMS_EXTENSION)) {
- // Mark this file for deletion. Note that we don't
- // actually try to delete it until the new segments files is
- // successfully written:
- deleter.addPendingFile(oldFileName);
- }
-
si.advanceNormGen(this.number);
IndexOutput out = directory().createOutput(si.getNormFileName(this.number));
try {
@@ -227,14 +218,6 @@
protected void doCommit() throws IOException {
if (deletedDocsDirty) { // re-write deleted
- String oldDelFileName = si.getDelFileName();
- if (oldDelFileName != null) {
- // Mark this file for deletion. Note that we don't
- // actually try to delete it until the new segments files is
- // successfully written:
- deleter.addPendingFile(oldDelFileName);
- }
-
si.advanceDelGen();
// We can write directly to the actual name (vs to a
@@ -243,13 +226,6 @@
deletedDocs.write(directory(), si.getDelFileName());
}
if (undeleteAll && si.hasDeletions()) {
- String oldDelFileName = si.getDelFileName();
- if (oldDelFileName != null) {
- // Mark this file for deletion. Note that we don't
- // actually try to delete it until the new segments files is
- // successfully written:
- deleter.addPendingFile(oldDelFileName);
- }
si.clearDelGen();
}
if (normsDirty) { // re-write norms
@@ -320,37 +296,7 @@
}
Vector files() throws IOException {
- Vector files = new Vector(16);
-
- if (si.getUseCompoundFile()) {
- String name = segment + ".cfs";
- if (directory().fileExists(name)) {
- files.addElement(name);
- }
- } else {
- for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
- String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
- if (directory().fileExists(name))
- files.addElement(name);
- }
- }
-
- if (si.hasDeletions()) {
- files.addElement(si.getDelFileName());
- }
-
- boolean addedNrm = false;
- for (int i = 0; i < fieldInfos.size(); i++) {
- String name = si.getNormFileName(i);
- if (name != null && directory().fileExists(name)) {
- if (name.endsWith("." + IndexFileNames.NORMS_EXTENSION)) {
- if (addedNrm) continue; // add .nrm just once
- addedNrm = true;
- }
- files.addElement(name);
- }
- }
- return files;
+ return new Vector(si.files());
}
public TermEnum terms() {
Index: src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java
===================================================================
--- src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java (revision 0)
+++ src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java (revision 0)
@@ -0,0 +1,50 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+
+/**
+ * This {@link IndexDeletionPolicy} implementation that
+ * keeps only the most recent commit and immediately removes
+ * all prior commits after a new commit is done. This is
+ * the default deletion policy.
+ */
+
+public final class KeepOnlyLastCommitDeletionPolicy implements IndexDeletionPolicy {
+
+ /**
+ * Deletes all commits except the most recent one.
+ */
+ public void onInit(List commits) {
+ // Note that commits.size() should normally be 1:
+ onCommit(commits);
+ }
+
+ /**
+ * Deletes all commits except the most recent one.
+ */
+ public void onCommit(List commits) {
+ // Note that commits.size() should normally be 2 (if not
+ // called by onInit above):
+ int size = commits.size();
+ for(int i=0;i<size-1;i++) {
+ ((IndexCommitPoint) commits.get(i)).delete();
+ }
+ }
+}
Property changes on: src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/site/src/documentation/content/xdocs/fileformats.xml
===================================================================
--- src/site/src/documentation/content/xdocs/fileformats.xml (revision 515500)
+++ src/site/src/documentation/content/xdocs/fileformats.xml (working copy)
@@ -771,7 +771,9 @@
generation is the active one (when older
segments_N files are present it's because they
temporarily cannot be deleted, or, a writer is in
- the process of committing). This file lists each
+ the process of committing, or a custom
+ <a href="http://lucene.apache.org/java/docs/api/org/apache/lucene/index/IndexDeletionPolicy.html">IndexDeletionPolicy</a>
+ is in use). This file lists each
segment by name, has details about the separate
norms and deletion files, and also contains the
size of each segment.