| Index: CHANGES.txt |
| =================================================================== |
| --- CHANGES.txt (revision 515500) |
| +++ CHANGES.txt (working copy) |
| @@ -20,6 +20,15 @@ |
| classes, package-private again (they were unnecessarily made public |
| as part of LUCENE-701). (Mike McCandless) |
| |
| + 3. LUCENE-710: added optional autoCommit boolean to IndexWriter |
| + constructors. When this is false, index changes are not committed |
| + until the writer is closed. This gives explicit control over when |
| + a reader will see the changes. Also added optional custom |
| + deletion policy to explicitly control when prior commits are |
| + removed from the index. This is intended to allow applications to |
| + share an index over NFS by customizing when prior commits are |
| + deleted. (Mike McCandless) |
| + |
| Bug fixes |
| |
| 1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist. (Doron Cohen) |
| Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java |
| =================================================================== |
| --- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 515500) |
| +++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (working copy) |
| @@ -68,7 +68,7 @@ |
| if (realUsage > dir.maxUsedSize) { |
| dir.maxUsedSize = realUsage; |
| } |
| - throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes"); |
| + throw new IOException("fake disk full at " + dir.getRecomputedActualSizeInBytes() + " bytes"); |
| } else { |
| super.flushBuffer(src, len); |
| } |
| Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 515500) |
| +++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) |
| @@ -25,175 +25,259 @@ |
| "Venice has lots of canals" }; |
| String[] text = { "Amsterdam", "Venice" }; |
| |
| - Directory dir = new RAMDirectory(); |
| - IndexWriter modifier = new IndexWriter(dir, |
| - new WhitespaceAnalyzer(), true); |
| - modifier.setUseCompoundFile(true); |
| - modifier.setMaxBufferedDeleteTerms(1); |
| + for(int pass=0;pass<2;pass++) { |
| + boolean autoCommit = (0==pass); |
| |
| - for (int i = 0; i < keywords.length; i++) { |
| - Document doc = new Document(); |
| - doc.add(new Field("id", keywords[i], Field.Store.YES, |
| - Field.Index.UN_TOKENIZED)); |
| - doc.add(new Field("country", unindexed[i], Field.Store.YES, |
| - Field.Index.NO)); |
| - doc.add(new Field("contents", unstored[i], Field.Store.NO, |
| - Field.Index.TOKENIZED)); |
| - doc |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer(), true); |
| + modifier.setUseCompoundFile(true); |
| + modifier.setMaxBufferedDeleteTerms(1); |
| + |
| + for (int i = 0; i < keywords.length; i++) { |
| + Document doc = new Document(); |
| + doc.add(new Field("id", keywords[i], Field.Store.YES, |
| + Field.Index.UN_TOKENIZED)); |
| + doc.add(new Field("country", unindexed[i], Field.Store.YES, |
| + Field.Index.NO)); |
| + doc.add(new Field("contents", unstored[i], Field.Store.NO, |
| + Field.Index.TOKENIZED)); |
| + doc |
| .add(new Field("city", text[i], Field.Store.YES, |
| - Field.Index.TOKENIZED)); |
| - modifier.addDocument(doc); |
| - } |
| - modifier.optimize(); |
| + Field.Index.TOKENIZED)); |
| + modifier.addDocument(doc); |
| + } |
| + modifier.optimize(); |
| |
| - Term term = new Term("city", "Amsterdam"); |
| - int hitCount = getHitCount(dir, term); |
| - assertEquals(1, hitCount); |
| - modifier.deleteDocuments(term); |
| - hitCount = getHitCount(dir, term); |
| - assertEquals(0, hitCount); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| |
| - modifier.close(); |
| + Term term = new Term("city", "Amsterdam"); |
| + int hitCount = getHitCount(dir, term); |
| + assertEquals(1, hitCount); |
| + if (!autoCommit) { |
| + modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); |
| + modifier.setUseCompoundFile(true); |
| + } |
| + modifier.deleteDocuments(term); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| + hitCount = getHitCount(dir, term); |
| + assertEquals(0, hitCount); |
| + |
| + if (autoCommit) { |
| + modifier.close(); |
| + } |
| + dir.close(); |
| + } |
| } |
| |
| // test when delete terms only apply to disk segments |
| public void testNonRAMDelete() throws IOException { |
| - Directory dir = new RAMDirectory(); |
| - IndexWriter modifier = new IndexWriter(dir, |
| - new WhitespaceAnalyzer(), true); |
| - modifier.setMaxBufferedDocs(2); |
| - modifier.setMaxBufferedDeleteTerms(2); |
| + for(int pass=0;pass<2;pass++) { |
| + boolean autoCommit = (0==pass); |
| |
| - int id = 0; |
| - int value = 100; |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer(), true); |
| + modifier.setMaxBufferedDocs(2); |
| + modifier.setMaxBufferedDeleteTerms(2); |
| |
| - for (int i = 0; i < 7; i++) { |
| - addDoc(modifier, ++id, value); |
| - } |
| - modifier.flush(); |
| + int id = 0; |
| + int value = 100; |
| |
| - assertEquals(0, modifier.getRamSegmentCount()); |
| - assertTrue(0 < modifier.getSegmentCount()); |
| + for (int i = 0; i < 7; i++) { |
| + addDoc(modifier, ++id, value); |
| + } |
| + modifier.flush(); |
| |
| - IndexReader reader = IndexReader.open(dir); |
| - assertEquals(7, reader.numDocs()); |
| - reader.close(); |
| + assertEquals(0, modifier.getRamSegmentCount()); |
| + assertTrue(0 < modifier.getSegmentCount()); |
| |
| - modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| - modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| |
| - reader = IndexReader.open(dir); |
| - assertEquals(0, reader.numDocs()); |
| - reader.close(); |
| + IndexReader reader = IndexReader.open(dir); |
| + assertEquals(7, reader.numDocs()); |
| + reader.close(); |
| |
| - modifier.close(); |
| + if (!autoCommit) { |
| + modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); |
| + modifier.setMaxBufferedDocs(2); |
| + modifier.setMaxBufferedDeleteTerms(2); |
| + } |
| + |
| + modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| + modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| + |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| + |
| + reader = IndexReader.open(dir); |
| + assertEquals(0, reader.numDocs()); |
| + reader.close(); |
| + if (autoCommit) { |
| + modifier.close(); |
| + } |
| + dir.close(); |
| + } |
| } |
| |
| // test when delete terms only apply to ram segments |
| public void testRAMDeletes() throws IOException { |
| - Directory dir = new RAMDirectory(); |
| - IndexWriter modifier = new IndexWriter(dir, |
| - new WhitespaceAnalyzer(), true); |
| - modifier.setMaxBufferedDocs(4); |
| - modifier.setMaxBufferedDeleteTerms(4); |
| + for(int pass=0;pass<2;pass++) { |
| + boolean autoCommit = (0==pass); |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer(), true); |
| + modifier.setMaxBufferedDocs(4); |
| + modifier.setMaxBufferedDeleteTerms(4); |
| |
| - int id = 0; |
| - int value = 100; |
| + int id = 0; |
| + int value = 100; |
| |
| - addDoc(modifier, ++id, value); |
| - modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| - addDoc(modifier, ++id, value); |
| - modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| + addDoc(modifier, ++id, value); |
| + modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| + addDoc(modifier, ++id, value); |
| + modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| |
| - assertEquals(2, modifier.getNumBufferedDeleteTerms()); |
| - assertEquals(1, modifier.getBufferedDeleteTermsSize()); |
| + assertEquals(2, modifier.getNumBufferedDeleteTerms()); |
| + assertEquals(1, modifier.getBufferedDeleteTermsSize()); |
| |
| - addDoc(modifier, ++id, value); |
| - assertEquals(0, modifier.getSegmentCount()); |
| - modifier.flush(); |
| + addDoc(modifier, ++id, value); |
| + assertEquals(0, modifier.getSegmentCount()); |
| + modifier.flush(); |
| |
| - IndexReader reader = IndexReader.open(dir); |
| - assertEquals(1, reader.numDocs()); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| |
| - int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); |
| - assertEquals(1, hitCount); |
| - reader.close(); |
| + IndexReader reader = IndexReader.open(dir); |
| + assertEquals(1, reader.numDocs()); |
| |
| - modifier.close(); |
| + int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); |
| + assertEquals(1, hitCount); |
| + reader.close(); |
| + if (autoCommit) { |
| + modifier.close(); |
| + } |
| + dir.close(); |
| + } |
| } |
| |
| // test when delete terms apply to both disk and ram segments |
| public void testBothDeletes() throws IOException { |
| - Directory dir = new RAMDirectory(); |
| - IndexWriter modifier = new IndexWriter(dir, |
| - new WhitespaceAnalyzer(), true); |
| - modifier.setMaxBufferedDocs(100); |
| - modifier.setMaxBufferedDeleteTerms(100); |
| + for(int pass=0;pass<2;pass++) { |
| + boolean autoCommit = (0==pass); |
| |
| - int id = 0; |
| - int value = 100; |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer(), true); |
| + modifier.setMaxBufferedDocs(100); |
| + modifier.setMaxBufferedDeleteTerms(100); |
| |
| - for (int i = 0; i < 5; i++) { |
| - addDoc(modifier, ++id, value); |
| - } |
| + int id = 0; |
| + int value = 100; |
| |
| - value = 200; |
| - for (int i = 0; i < 5; i++) { |
| - addDoc(modifier, ++id, value); |
| - } |
| - modifier.flush(); |
| + for (int i = 0; i < 5; i++) { |
| + addDoc(modifier, ++id, value); |
| + } |
| |
| - for (int i = 0; i < 5; i++) { |
| - addDoc(modifier, ++id, value); |
| - } |
| - modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| - modifier.flush(); |
| + value = 200; |
| + for (int i = 0; i < 5; i++) { |
| + addDoc(modifier, ++id, value); |
| + } |
| + modifier.flush(); |
| |
| - IndexReader reader = IndexReader.open(dir); |
| - assertEquals(5, reader.numDocs()); |
| + for (int i = 0; i < 5; i++) { |
| + addDoc(modifier, ++id, value); |
| + } |
| + modifier.deleteDocuments(new Term("value", String.valueOf(value))); |
| |
| - modifier.close(); |
| + modifier.flush(); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| + |
| + IndexReader reader = IndexReader.open(dir); |
| + assertEquals(5, reader.numDocs()); |
| + if (autoCommit) { |
| + modifier.close(); |
| + } |
| + } |
| } |
| |
| // test that batched delete terms are flushed together |
| public void testBatchDeletes() throws IOException { |
| - Directory dir = new RAMDirectory(); |
| - IndexWriter modifier = new IndexWriter(dir, |
| - new WhitespaceAnalyzer(), true); |
| - modifier.setMaxBufferedDocs(2); |
| - modifier.setMaxBufferedDeleteTerms(2); |
| + for(int pass=0;pass<2;pass++) { |
| + boolean autoCommit = (0==pass); |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer(), true); |
| + modifier.setMaxBufferedDocs(2); |
| + modifier.setMaxBufferedDeleteTerms(2); |
| |
| - int id = 0; |
| - int value = 100; |
| + int id = 0; |
| + int value = 100; |
| |
| - for (int i = 0; i < 7; i++) { |
| - addDoc(modifier, ++id, value); |
| - } |
| - modifier.flush(); |
| + for (int i = 0; i < 7; i++) { |
| + addDoc(modifier, ++id, value); |
| + } |
| + modifier.flush(); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| |
| - IndexReader reader = IndexReader.open(dir); |
| - assertEquals(7, reader.numDocs()); |
| - reader.close(); |
| + IndexReader reader = IndexReader.open(dir); |
| + assertEquals(7, reader.numDocs()); |
| + reader.close(); |
| + |
| + if (!autoCommit) { |
| + modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer()); |
| + modifier.setMaxBufferedDocs(2); |
| + modifier.setMaxBufferedDeleteTerms(2); |
| + } |
| |
| - id = 0; |
| - modifier.deleteDocuments(new Term("id", String.valueOf(++id))); |
| - modifier.deleteDocuments(new Term("id", String.valueOf(++id))); |
| + id = 0; |
| + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); |
| + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); |
| |
| - reader = IndexReader.open(dir); |
| - assertEquals(5, reader.numDocs()); |
| - reader.close(); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| |
| - Term[] terms = new Term[3]; |
| - for (int i = 0; i < terms.length; i++) { |
| - terms[i] = new Term("id", String.valueOf(++id)); |
| - } |
| - modifier.deleteDocuments(terms); |
| + reader = IndexReader.open(dir); |
| + assertEquals(5, reader.numDocs()); |
| + reader.close(); |
| |
| - reader = IndexReader.open(dir); |
| - assertEquals(2, reader.numDocs()); |
| - reader.close(); |
| + Term[] terms = new Term[3]; |
| + for (int i = 0; i < terms.length; i++) { |
| + terms[i] = new Term("id", String.valueOf(++id)); |
| + } |
| + if (!autoCommit) { |
| + modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer()); |
| + modifier.setMaxBufferedDocs(2); |
| + modifier.setMaxBufferedDeleteTerms(2); |
| + } |
| + modifier.deleteDocuments(terms); |
| + if (!autoCommit) { |
| + modifier.close(); |
| + } |
| + reader = IndexReader.open(dir); |
| + assertEquals(2, reader.numDocs()); |
| + reader.close(); |
| |
| - modifier.close(); |
| + if (autoCommit) { |
| + modifier.close(); |
| + } |
| + dir.close(); |
| + } |
| } |
| |
| private void addDoc(IndexWriter modifier, int id, int value) |
| @@ -233,201 +317,203 @@ |
| int START_COUNT = 157; |
| int END_COUNT = 144; |
| |
| - // First build up a starting index: |
| - RAMDirectory startDir = new RAMDirectory(); |
| - IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), |
| - true); |
| - for (int i = 0; i < 157; i++) { |
| - Document d = new Document(); |
| - d.add(new Field("id", Integer.toString(i), Field.Store.YES, |
| - Field.Index.UN_TOKENIZED)); |
| - d.add(new Field("content", "aaa " + i, Field.Store.NO, |
| - Field.Index.TOKENIZED)); |
| - writer.addDocument(d); |
| - } |
| - writer.close(); |
| + for(int pass=0;pass<2;pass++) { |
| + boolean autoCommit = (0==pass); |
| |
| - long diskUsage = startDir.sizeInBytes(); |
| - long diskFree = diskUsage + 10; |
| + // First build up a starting index: |
| + RAMDirectory startDir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(startDir, autoCommit, |
| + new WhitespaceAnalyzer(), true); |
| + for (int i = 0; i < 157; i++) { |
| + Document d = new Document(); |
| + d.add(new Field("id", Integer.toString(i), Field.Store.YES, |
| + Field.Index.UN_TOKENIZED)); |
| + d.add(new Field("content", "aaa " + i, Field.Store.NO, |
| + Field.Index.TOKENIZED)); |
| + writer.addDocument(d); |
| + } |
| + writer.close(); |
| |
| - IOException err = null; |
| + long diskUsage = startDir.sizeInBytes(); |
| + long diskFree = diskUsage + 10; |
| |
| - boolean done = false; |
| + IOException err = null; |
| |
| - // Iterate w/ ever increasing free disk space: |
| - while (!done) { |
| - MockRAMDirectory dir = new MockRAMDirectory(startDir); |
| - IndexWriter modifier = new IndexWriter(dir, |
| - new WhitespaceAnalyzer(), false); |
| + boolean done = false; |
| |
| - modifier.setMaxBufferedDocs(1000); // use flush or close |
| - modifier.setMaxBufferedDeleteTerms(1000); // use flush or close |
| + // Iterate w/ ever increasing free disk space: |
| + while (!done) { |
| + MockRAMDirectory dir = new MockRAMDirectory(startDir); |
| + IndexWriter modifier = new IndexWriter(dir, autoCommit, |
| + new WhitespaceAnalyzer()); |
| |
| - // For each disk size, first try to commit against |
| - // dir that will hit random IOExceptions & disk |
| - // full; after, give it infinite disk space & turn |
| - // off random IOExceptions & retry w/ same reader: |
| - boolean success = false; |
| + modifier.setMaxBufferedDocs(1000); // use flush or close |
| + modifier.setMaxBufferedDeleteTerms(1000); // use flush or close |
| |
| - for (int x = 0; x < 2; x++) { |
| + // For each disk size, first try to commit against |
| + // dir that will hit random IOExceptions & disk |
| + // full; after, give it infinite disk space & turn |
| + // off random IOExceptions & retry w/ same reader: |
| + boolean success = false; |
| |
| - double rate = 0.1; |
| - double diskRatio = ((double)diskFree) / diskUsage; |
| - long thisDiskFree; |
| - String testName; |
| + for (int x = 0; x < 2; x++) { |
| |
| - if (0 == x) { |
| - thisDiskFree = diskFree; |
| - if (diskRatio >= 2.0) { |
| - rate /= 2; |
| - } |
| - if (diskRatio >= 4.0) { |
| - rate /= 2; |
| - } |
| - if (diskRatio >= 6.0) { |
| + double rate = 0.1; |
| + double diskRatio = ((double)diskFree) / diskUsage; |
| + long thisDiskFree; |
| + String testName; |
| + |
| + if (0 == x) { |
| + thisDiskFree = diskFree; |
| + if (diskRatio >= 2.0) { |
| + rate /= 2; |
| + } |
| + if (diskRatio >= 4.0) { |
| + rate /= 2; |
| + } |
| + if (diskRatio >= 6.0) { |
| + rate = 0.0; |
| + } |
| + if (debug) { |
| + System.out.println("\ncycle: " + diskFree + " bytes"); |
| + } |
| + testName = "disk full during reader.close() @ " + thisDiskFree |
| + + " bytes"; |
| + } else { |
| + thisDiskFree = 0; |
| rate = 0.0; |
| + if (debug) { |
| + System.out.println("\ncycle: same writer: unlimited disk space"); |
| + } |
| + testName = "reader re-use after disk full"; |
| } |
| - if (debug) { |
| - System.out.println("\ncycle: " + diskFree + " bytes"); |
| + |
| + dir.setMaxSizeInBytes(thisDiskFree); |
| + dir.setRandomIOExceptionRate(rate, diskFree); |
| + |
| + try { |
| + if (0 == x) { |
| + int docId = 12; |
| + for (int i = 0; i < 13; i++) { |
| + if (updates) { |
| + Document d = new Document(); |
| + d.add(new Field("id", Integer.toString(i), Field.Store.YES, |
| + Field.Index.UN_TOKENIZED)); |
| + d.add(new Field("content", "bbb " + i, Field.Store.NO, |
| + Field.Index.TOKENIZED)); |
| + modifier.updateDocument(new Term("id", Integer.toString(docId)), d); |
| + } else { // deletes |
| + modifier.deleteDocuments(new Term("id", Integer.toString(docId))); |
| + // modifier.setNorm(docId, "contents", (float)2.0); |
| + } |
| + docId += 12; |
| + } |
| + } |
| + modifier.close(); |
| + success = true; |
| + if (0 == x) { |
| + done = true; |
| + } |
| } |
| - testName = "disk full during reader.close() @ " + thisDiskFree |
| - + " bytes"; |
| - } else { |
| - thisDiskFree = 0; |
| - rate = 0.0; |
| - if (debug) { |
| - System.out.println("\ncycle: same writer: unlimited disk space"); |
| + catch (IOException e) { |
| + if (debug) { |
| + System.out.println(" hit IOException: " + e); |
| + } |
| + err = e; |
| + if (1 == x) { |
| + e.printStackTrace(); |
| + fail(testName + " hit IOException after disk space was freed up"); |
| + } |
| } |
| - testName = "reader re-use after disk full"; |
| - } |
| |
| - dir.setMaxSizeInBytes(thisDiskFree); |
| - dir.setRandomIOExceptionRate(rate, diskFree); |
| + // Whether we succeeded or failed, check that all |
| + // un-referenced files were in fact deleted (ie, |
| + // we did not create garbage). Just create a |
| + // new IndexFileDeleter, have it delete |
| + // unreferenced files, then verify that in fact |
| + // no files were deleted: |
| + String[] startFiles = dir.list(); |
| + SegmentInfos infos = new SegmentInfos(); |
| + infos.read(dir); |
| + IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null); |
| + String[] endFiles = dir.list(); |
| |
| - try { |
| - if (0 == x) { |
| - int docId = 12; |
| - for (int i = 0; i < 13; i++) { |
| - if (updates) { |
| - Document d = new Document(); |
| - d.add(new Field("id", Integer.toString(i), Field.Store.YES, |
| - Field.Index.UN_TOKENIZED)); |
| - d.add(new Field("content", "bbb " + i, Field.Store.NO, |
| - Field.Index.TOKENIZED)); |
| - modifier.updateDocument(new Term("id", Integer.toString(docId)), d); |
| - } else { // deletes |
| - modifier.deleteDocuments(new Term("id", Integer.toString(docId))); |
| - // modifier.setNorm(docId, "contents", (float)2.0); |
| - } |
| - docId += 12; |
| + Arrays.sort(startFiles); |
| + Arrays.sort(endFiles); |
| + |
| + // for(int i=0;i<startFiles.length;i++) { |
| + // System.out.println(" startFiles: " + i + ": " + startFiles[i]); |
| + // } |
| + |
| + if (!Arrays.equals(startFiles, endFiles)) { |
| + String successStr; |
| + if (success) { |
| + successStr = "success"; |
| + } else { |
| + successStr = "IOException"; |
| + err.printStackTrace(); |
| } |
| + fail("reader.close() failed to delete unreferenced files after " |
| + + successStr + " (" + diskFree + " bytes): before delete:\n " |
| + + arrayToString(startFiles) + "\n after delete:\n " |
| + + arrayToString(endFiles)); |
| } |
| - modifier.close(); |
| - success = true; |
| - if (0 == x) { |
| - done = true; |
| + |
| + // Finally, verify index is not corrupt, and, if |
| + // we succeeded, we see all docs changed, and if |
| + // we failed, we see either all docs or no docs |
| + // changed (transactional semantics): |
| + IndexReader newReader = null; |
| + try { |
| + newReader = IndexReader.open(dir); |
| } |
| - } |
| - catch (IOException e) { |
| - if (debug) { |
| - System.out.println(" hit IOException: " + e); |
| + catch (IOException e) { |
| + e.printStackTrace(); |
| + fail(testName |
| + + ":exception when creating IndexReader after disk full during close: " |
| + + e); |
| } |
| - err = e; |
| - if (1 == x) { |
| + |
| + IndexSearcher searcher = new IndexSearcher(newReader); |
| + Hits hits = null; |
| + try { |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + } |
| + catch (IOException e) { |
| e.printStackTrace(); |
| - fail(testName + " hit IOException after disk space was freed up"); |
| + fail(testName + ": exception when searching: " + e); |
| } |
| - } |
| - |
| - // Whether we succeeded or failed, check that all |
| - // un-referenced files were in fact deleted (ie, |
| - // we did not create garbage). Just create a |
| - // new IndexFileDeleter, have it delete |
| - // unreferenced files, then verify that in fact |
| - // no files were deleted: |
| - String[] startFiles = dir.list(); |
| - SegmentInfos infos = new SegmentInfos(); |
| - infos.read(dir); |
| - IndexFileDeleter d = new IndexFileDeleter(infos, dir); |
| - d.findDeletableFiles(); |
| - d.deleteFiles(); |
| - String[] endFiles = dir.list(); |
| - |
| - Arrays.sort(startFiles); |
| - Arrays.sort(endFiles); |
| - |
| - // for(int i=0;i<startFiles.length;i++) { |
| - // System.out.println(" startFiles: " + i + ": " + startFiles[i]); |
| - // } |
| - |
| - if (!Arrays.equals(startFiles, endFiles)) { |
| - String successStr; |
| + int result2 = hits.length(); |
| if (success) { |
| - successStr = "success"; |
| + if (result2 != END_COUNT) { |
| + fail(testName |
| + + ": method did not throw exception but hits.length for search on term 'aaa' is " |
| + + result2 + " instead of expected " + END_COUNT); |
| + } |
| } else { |
| - successStr = "IOException"; |
| - err.printStackTrace(); |
| + // On hitting exception we still may have added |
| + // all docs: |
| + if (result2 != START_COUNT && result2 != END_COUNT) { |
| + err.printStackTrace(); |
| + fail(testName |
| + + ": method did throw exception but hits.length for search on term 'aaa' is " |
| + + result2 + " instead of expected " + START_COUNT); |
| + } |
| } |
| - fail("reader.close() failed to delete unreferenced files after " |
| - + successStr + " (" + diskFree + " bytes): before delete:\n " |
| - + arrayToString(startFiles) + "\n after delete:\n " |
| - + arrayToString(endFiles)); |
| - } |
| |
| - // Finally, verify index is not corrupt, and, if |
| - // we succeeded, we see all docs changed, and if |
| - // we failed, we see either all docs or no docs |
| - // changed (transactional semantics): |
| - IndexReader newReader = null; |
| - try { |
| - newReader = IndexReader.open(dir); |
| - } |
| - catch (IOException e) { |
| - e.printStackTrace(); |
| - fail(testName |
| - + ":exception when creating IndexReader after disk full during close: " |
| - + e); |
| - } |
| + searcher.close(); |
| + newReader.close(); |
| |
| - IndexSearcher searcher = new IndexSearcher(newReader); |
| - Hits hits = null; |
| - try { |
| - hits = searcher.search(new TermQuery(searchTerm)); |
| - } |
| - catch (IOException e) { |
| - e.printStackTrace(); |
| - fail(testName + ": exception when searching: " + e); |
| - } |
| - int result2 = hits.length(); |
| - if (success) { |
| - if (result2 != END_COUNT) { |
| - fail(testName |
| - + ": method did not throw exception but hits.length for search on term 'aaa' is " |
| - + result2 + " instead of expected " + END_COUNT); |
| + if (result2 == END_COUNT) { |
| + break; |
| } |
| - } else { |
| - // On hitting exception we still may have added |
| - // all docs: |
| - if (result2 != START_COUNT && result2 != END_COUNT) { |
| - err.printStackTrace(); |
| - fail(testName |
| - + ": method did throw exception but hits.length for search on term 'aaa' is " |
| - + result2 + " instead of expected " + START_COUNT); |
| - } |
| } |
| |
| - searcher.close(); |
| - newReader.close(); |
| + dir.close(); |
| |
| - if (result2 == END_COUNT) { |
| - break; |
| - } |
| + // Try again with 10 more bytes of free space: |
| + diskFree += 10; |
| } |
| - |
| - dir.close(); |
| - |
| - // Try again with 10 more bytes of free space: |
| - diskFree += 10; |
| } |
| } |
| |
| Index: src/test/org/apache/lucene/index/TestIndexReader.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 515500) |
| +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) |
| @@ -802,9 +802,7 @@ |
| String[] startFiles = dir.list(); |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| - IndexFileDeleter d = new IndexFileDeleter(infos, dir); |
| - d.findDeletableFiles(); |
| - d.deleteFiles(); |
| + IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null); |
| String[] endFiles = dir.list(); |
| |
| Arrays.sort(startFiles); |
| Index: src/test/org/apache/lucene/index/TestIndexWriter.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 515500) |
| +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) |
| @@ -71,7 +71,7 @@ |
| reader.close(); |
| |
| // optimize the index and check that the new doc count is correct |
| - writer = new IndexWriter(dir, new WhitespaceAnalyzer()); |
| + writer = new IndexWriter(dir, true, new WhitespaceAnalyzer()); |
| writer.optimize(); |
| assertEquals(60, writer.docCount()); |
| writer.close(); |
| @@ -163,7 +163,7 @@ |
| // addIndexes will certainly run out of space & |
| // fail. Verify that when this happens, index is |
| // not corrupt and index in fact has added no |
| - // documents. Then, we increase disk space by 1000 |
| + // documents. Then, we increase disk space by 2000 |
| // bytes each iteration. At some point there is |
| // enough free disk space and addIndexes should |
| // succeed and index should show all documents were |
| @@ -178,11 +178,14 @@ |
| startDiskUsage += startDir.fileLength(files[i]); |
| } |
| |
| - for(int method=0;method<3;method++) { |
| + for(int iter=0;iter<6;iter++) { |
| |
| // Start with 100 bytes more than we are currently using: |
| long diskFree = diskUsage+100; |
| |
| + boolean autoCommit = iter % 2 == 0; |
| + int method = iter/2; |
| + |
| boolean success = false; |
| boolean done = false; |
| |
| @@ -195,7 +198,7 @@ |
| methodName = "addIndexesNoOptimize(Directory[])"; |
| } |
| |
| - String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes"; |
| + String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes with autoCommit = " + autoCommit; |
| |
| int cycleCount = 0; |
| |
| @@ -205,7 +208,7 @@ |
| |
| // Make a new dir that will enforce disk usage: |
| MockRAMDirectory dir = new MockRAMDirectory(startDir); |
| - writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); |
| IOException err = null; |
| |
| for(int x=0;x<2;x++) { |
| @@ -285,38 +288,27 @@ |
| } |
| } |
| |
| - // Whether we succeeded or failed, check that all |
| - // un-referenced files were in fact deleted (ie, |
| - // we did not create garbage). Just create a |
| - // new IndexFileDeleter, have it delete |
| - // unreferenced files, then verify that in fact |
| - // no files were deleted: |
| - String[] startFiles = dir.list(); |
| - SegmentInfos infos = new SegmentInfos(); |
| - infos.read(dir); |
| - IndexFileDeleter d = new IndexFileDeleter(infos, dir); |
| - d.findDeletableFiles(); |
| - d.deleteFiles(); |
| - String[] endFiles = dir.list(); |
| + if (autoCommit) { |
| |
| - Arrays.sort(startFiles); |
| - Arrays.sort(endFiles); |
| + // Whether we succeeded or failed, check that |
| + // all un-referenced files were in fact |
| + // deleted (ie, we did not create garbage). |
| + // Only check this when autoCommit is true: |
| + // when it's false, it's expected that there |
| + // are unreferenced files (ie they won't be |
| + // referenced until the "commit on close"). |
| + // Just create a new IndexFileDeleter, have it |
| + // delete unreferenced files, then verify that |
| + // in fact no files were deleted: |
| |
| - /* |
| - for(int i=0;i<startFiles.length;i++) { |
| - System.out.println(" " + i + ": " + startFiles[i]); |
| - } |
| - */ |
| - |
| - if (!Arrays.equals(startFiles, endFiles)) { |
| String successStr; |
| if (success) { |
| successStr = "success"; |
| } else { |
| successStr = "IOException"; |
| - err.printStackTrace(); |
| } |
| - fail(methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); |
| + String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)"; |
| + assertNoUnreferencedFiles(dir, message); |
| } |
| |
| if (debug) { |
| @@ -335,8 +327,10 @@ |
| } |
| int result = reader.docFreq(searchTerm); |
| if (success) { |
| - if (result != END_COUNT) { |
| + if (autoCommit && result != END_COUNT) { |
| fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); |
| + } else if (!autoCommit && result != START_COUNT) { |
| + fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]"); |
| } |
| } else { |
| // On hitting exception we still may have added |
| @@ -374,31 +368,107 @@ |
| System.out.println(" count is " + result); |
| } |
| |
| - if (result == END_COUNT) { |
| + if (done || result == END_COUNT) { |
| break; |
| } |
| } |
| |
| - // Javadocs state that temp free Directory space |
| - // required is at most 2X total input size of |
| - // indices so let's make sure: |
| - assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName + |
| - ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " + |
| - "starting disk usage = " + startDiskUsage + " bytes; " + |
| - "input index disk usage = " + inputDiskUsage + " bytes", |
| - (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage)); |
| + if (debug) { |
| + System.out.println(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); |
| + } |
| |
| + if (done) { |
| + // Javadocs state that temp free Directory space |
| + // required is at most 2X total input size of |
| + // indices so let's make sure: |
| + assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName + |
| + ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " + |
| + "starting disk usage = " + startDiskUsage + " bytes; " + |
| + "input index disk usage = " + inputDiskUsage + " bytes", |
| + (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage)); |
| + } |
| + |
| writer.close(); |
| dir.close(); |
| |
| - // Try again with 1000 more bytes of free space: |
| - diskFree += 1000; |
| + // Try again with 2000 more bytes of free space: |
| + diskFree += 2000; |
| } |
| } |
| |
| startDir.close(); |
| } |
| |
| + /* |
| + * Make sure IndexWriter cleans up on hitting a disk |
| + * full exception in addDocument. |
| + */ |
| + public void testAddDocumentOnDiskFull() throws IOException { |
| + |
| + for(int pass=0;pass<3;pass++) { |
| + boolean autoCommit = pass == 0; |
| + boolean doAbort = pass == 2; |
| + long diskFree = 200; |
| + while(true) { |
| + MockRAMDirectory dir = new MockRAMDirectory(); |
| + dir.setMaxSizeInBytes(diskFree); |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); |
| + boolean hitError = false; |
| + try { |
| + for(int i=0;i<200;i++) { |
| + addDoc(writer); |
| + } |
| + } catch (IOException e) { |
| + // e.printStackTrace(); |
| + hitError = true; |
| + } |
| + |
| + if (hitError) { |
| + if (doAbort) { |
| + writer.abort(); |
| + } else { |
| + try { |
| + writer.close(); |
| + } catch (IOException e) { |
| + // e.printStackTrace(); |
| + dir.setMaxSizeInBytes(0); |
| + writer.close(); |
| + } |
| + } |
| + |
| + assertNoUnreferencedFiles(dir, "after disk full during addDocument with autoCommit=" + autoCommit); |
| + |
| + // Make sure reader can open the index: |
| + IndexReader.open(dir).close(); |
| + |
| + dir.close(); |
| + |
| + // Now try again w/ more space: |
| + diskFree += 500; |
| + } else { |
| + dir.close(); |
| + break; |
| + } |
| + } |
| + } |
| + |
| + } |
| + |
| + public void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { |
| + String[] startFiles = dir.list(); |
| + SegmentInfos infos = new SegmentInfos(); |
| + infos.read(dir); |
| + IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null); |
| + String[] endFiles = dir.list(); |
| + |
| + Arrays.sort(startFiles); |
| + Arrays.sort(endFiles); |
| + |
| + if (!Arrays.equals(startFiles, endFiles)) { |
| + fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); |
| + } |
| + } |
| + |
| /** |
| * Make sure optimize doesn't use any more than 1X |
| * starting index size as its temporary free space |
| @@ -694,6 +764,205 @@ |
| } |
| } |
| |
| + /* |
| + * Simple test for "commit on close": open writer with |
| + * autoCommit=false, so it will only commit on close, |
| + * then add a bunch of docs, making sure reader does not |
| + * see these docs until writer is closed. |
| + */ |
| + public void testCommitOnClose() throws IOException { |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); |
| + for (int i = 0; i < 14; i++) { |
| + addDoc(writer); |
| + } |
| + writer.close(); |
| + |
| + Term searchTerm = new Term("content", "aaa"); |
| + IndexSearcher searcher = new IndexSearcher(dir); |
| + Hits hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("first number of hits", 14, hits.length()); |
| + searcher.close(); |
| + |
| + IndexReader reader = IndexReader.open(dir); |
| + |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); |
| + for(int i=0;i<3;i++) { |
| + for(int j=0;j<11;j++) { |
| + addDoc(writer); |
| + } |
| + searcher = new IndexSearcher(dir); |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("reader incorrectly sees changes from writer with autoCommit disabled", 14, hits.length()); |
| + searcher.close(); |
| + assertTrue("reader should have still been current", reader.isCurrent()); |
| + } |
| + |
| + // Now, close the writer: |
| + writer.close(); |
| + assertFalse("reader should not be current now", reader.isCurrent()); |
| + |
| + searcher = new IndexSearcher(dir); |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("reader did not see changes after writer was closed", 47, hits.length()); |
| + searcher.close(); |
| + } |
| + |
| + /* |
| + * Simple test for "commit on close": open writer with |
| + * autoCommit=false, so it will only commit on close, |
| + * then add a bunch of docs, making sure reader does not |
| + * see them until writer has closed. Then instead of |
| + * closing the writer, call abort and verify reader sees |
| + * nothing was added. Then verify we can open the index |
| + * and add docs to it. |
| + */ |
| + public void testCommitOnCloseAbort() throws IOException { |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); |
| + for (int i = 0; i < 14; i++) { |
| + addDoc(writer); |
| + } |
| + writer.close(); |
| + |
| + Term searchTerm = new Term("content", "aaa"); |
| + IndexSearcher searcher = new IndexSearcher(dir); |
| + Hits hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("first number of hits", 14, hits.length()); |
| + searcher.close(); |
| + |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); |
| + for(int j=0;j<17;j++) { |
| + addDoc(writer); |
| + } |
| + // Delete all docs: |
| + writer.deleteDocuments(searchTerm); |
| + |
| + searcher = new IndexSearcher(dir); |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("reader incorrectly sees changes from writer with autoCommit disabled", 14, hits.length()); |
| + searcher.close(); |
| + |
| + // Now, close the writer: |
| + writer.abort(); |
| + |
| + assertNoUnreferencedFiles(dir, "unreferenced files remain after abort()"); |
| + |
| + searcher = new IndexSearcher(dir); |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("saw changes after writer.abort", 14, hits.length()); |
| + searcher.close(); |
| + |
| + // Now make sure we can re-open the index, add docs, |
| + // and all is good: |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); |
| + for(int i=0;i<12;i++) { |
| + for(int j=0;j<17;j++) { |
| + addDoc(writer); |
| + } |
| + searcher = new IndexSearcher(dir); |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("reader incorrectly sees changes from writer with autoCommit disabled", 14, hits.length()); |
| + searcher.close(); |
| + } |
| + |
| + writer.close(); |
| + searcher = new IndexSearcher(dir); |
| + hits = searcher.search(new TermQuery(searchTerm)); |
| + assertEquals("didn't see changes after close", 218, hits.length()); |
| + searcher.close(); |
| + |
| + dir.close(); |
| + } |
| + |
| + /* |
| + * Verify that a writer with "commit on close" indeed |
| + * cleans up the temp segments created after opening |
| + * that are not referenced by the starting segments |
| + * file. We check this by using MockRAMDirectory to |
| + * measure max temp disk space used. |
| + */ |
| + public void testCommitOnCloseDiskUsage() throws IOException { |
| + MockRAMDirectory dir = new MockRAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); |
| + for(int j=0;j<30;j++) { |
| + addDocWithIndex(writer, j); |
| + } |
| + writer.close(); |
| + dir.resetMaxUsedSizeInBytes(); |
| + |
| + long startDiskUsage = dir.getMaxUsedSizeInBytes(); |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); |
| + for(int j=0;j<1470;j++) { |
| + addDocWithIndex(writer, j); |
| + } |
| + long midDiskUsage = dir.getMaxUsedSizeInBytes(); |
| + dir.resetMaxUsedSizeInBytes(); |
| + writer.optimize(); |
| + writer.close(); |
| + long endDiskUsage = dir.getMaxUsedSizeInBytes(); |
| + |
| + // Ending index is 50X as large as starting index; due |
| + // to 2X disk usage normally we allow 100X max |
| + // transient usage. If something is wrong w/ deleter |
| + // and it doesn't delete intermediate segments then it |
| + // will exceed this 100X: |
| + // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); |
| + assertTrue("writer used to much space while adding documents when autoCommit=false", |
| + midDiskUsage < 100*startDiskUsage); |
| + assertTrue("writer used to much space after close when autoCommit=false", |
| + endDiskUsage < 100*startDiskUsage); |
| + } |
| + |
| + |
| + /* |
| + * Verify that calling optimize when writer is open for |
| + * "commit on close" works correctly both for abort() |
| + * and close(). |
| + */ |
| + public void testCommitOnCloseOptimize() throws IOException { |
| + RAMDirectory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); |
| + for(int j=0;j<17;j++) { |
| + addDocWithIndex(writer, j); |
| + } |
| + writer.close(); |
| + |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); |
| + writer.optimize(); |
| + |
| + // Open a reader before closing (commiting) the writer: |
| + IndexReader reader = IndexReader.open(dir); |
| + |
| + // Reader should see index as unoptimized at this |
| + // point: |
| + assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); |
| + reader.close(); |
| + |
| + // Abort the writer: |
| + writer.abort(); |
| + assertNoUnreferencedFiles(dir, "aborted writer after optimize"); |
| + |
| + // Open a reader after aborting writer: |
| + reader = IndexReader.open(dir); |
| + |
| + // Reader should still see index as unoptimized: |
| + assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); |
| + reader.close(); |
| + |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); |
| + writer.optimize(); |
| + writer.close(); |
| + assertNoUnreferencedFiles(dir, "aborted writer after optimize"); |
| + |
| + // Open a reader after aborting writer: |
| + reader = IndexReader.open(dir); |
| + |
| + // Reader should still see index as unoptimized: |
| + assertTrue("Reader incorrectly sees that the index is unoptimized", reader.isOptimized()); |
| + reader.close(); |
| + } |
| + |
| // Make sure that a Directory implementation that does |
| // not use LockFactory at all (ie overrides makeLock and |
| // implements its own private locking) works OK. This |
| Index: src/test/org/apache/lucene/index/TestIndexFileDeleter.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestIndexFileDeleter.java (revision 515500) |
| +++ src/test/org/apache/lucene/index/TestIndexFileDeleter.java (working copy) |
| @@ -173,6 +173,8 @@ |
| out.writeBytes(b, len); |
| remainder -= len; |
| } |
| + in.close(); |
| + out.close(); |
| } |
| |
| private void addDoc(IndexWriter writer, int id) throws IOException |
| Index: src/test/org/apache/lucene/index/TestDeletionPolicy.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 0) |
| +++ src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 0) |
| @@ -0,0 +1,618 @@ |
| +package org.apache.lucene.index; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import junit.framework.TestCase; |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.analysis.WhitespaceAnalyzer; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.RAMDirectory; |
| +import org.apache.lucene.search.Query; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.search.Hits; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import java.util.List; |
| +import java.util.Iterator; |
| +import java.util.Set; |
| +import java.util.HashSet; |
| + |
| +/* |
| + Verify we can read the pre-2.1 file format, do searches |
| + against it, and add documents to it. |
| +*/ |
| + |
| +public class TestDeletionPolicy extends TestCase |
| +{ |
| + private void verifyCommitOrder(List commits) { |
| + long last = SegmentInfos.generationFromSegmentsFileName(((IndexCommitPoint) commits.get(0)).getSegmentsFileName()); |
| + for(int i=1;i<commits.size();i++) { |
| + long now = SegmentInfos.generationFromSegmentsFileName(((IndexCommitPoint) commits.get(i)).getSegmentsFileName()); |
| + assertTrue("SegmentInfos commits are out-of-order", now > last); |
| + last = now; |
| + } |
| + } |
| + |
| + class KeepAllDeletionPolicy implements IndexDeletionPolicy { |
| + int numOnInit; |
| + int numOnCommit; |
| + public void onInit(List commits) { |
| + verifyCommitOrder(commits); |
| + numOnInit++; |
| + } |
| + public void onCommit(List commits) { |
| + verifyCommitOrder(commits); |
| + numOnCommit++; |
| + } |
| + } |
| + |
| + /** |
| + * This is useful for adding to a big index w/ autoCommit |
| + * false when you know readers are not using it. |
| + */ |
| + class KeepNoneOnInitDeletionPolicy implements IndexDeletionPolicy { |
| + int numOnInit; |
| + int numOnCommit; |
| + public void onInit(List commits) { |
| + verifyCommitOrder(commits); |
| + numOnInit++; |
| + // On init, delete all commit points: |
| + Iterator it = commits.iterator(); |
| + while(it.hasNext()) { |
| + ((IndexCommitPoint) it.next()).delete(); |
| + } |
| + } |
| + public void onCommit(List commits) { |
| + verifyCommitOrder(commits); |
| + int size = commits.size(); |
| + // Delete all but last one: |
| + for(int i=0;i<size-1;i++) { |
| + ((IndexCommitPoint) commits.get(i)).delete(); |
| + } |
| + numOnCommit++; |
| + } |
| + } |
| + |
| + class KeepLastNDeletionPolicy implements IndexDeletionPolicy { |
| + int numOnInit; |
| + int numOnCommit; |
| + int numToKeep; |
| + int numDelete; |
| + Set seen = new HashSet(); |
| + |
| + public KeepLastNDeletionPolicy(int numToKeep) { |
| + this.numToKeep = numToKeep; |
| + } |
| + |
| + public void onInit(List commits) { |
| + verifyCommitOrder(commits); |
| + numOnInit++; |
| + // do no deletions on init |
| + doDeletes(commits, false); |
| + } |
| + |
| + public void onCommit(List commits) { |
| + verifyCommitOrder(commits); |
| + doDeletes(commits, true); |
| + } |
| + |
| + private void doDeletes(List commits, boolean isCommit) { |
| + |
| + // Assert that we really are only called for each new |
| + // commit: |
| + if (isCommit) { |
| + String fileName = ((IndexCommitPoint) commits.get(commits.size()-1)).getSegmentsFileName(); |
| + if (seen.contains(fileName)) { |
| + throw new RuntimeException("onCommit was called twice on the same commit point: " + fileName); |
| + } |
| + seen.add(fileName); |
| + numOnCommit++; |
| + } |
| + int size = commits.size(); |
| + for(int i=0;i<size-numToKeep;i++) { |
| + ((IndexCommitPoint) commits.get(i)).delete(); |
| + numDelete++; |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Delete a commit only when it has been obsoleted by N |
| + * seconds. |
| + */ |
| + class ExpirationTimeDeletionPolicy implements IndexDeletionPolicy { |
| + |
| + Directory dir; |
| + double expirationTimeSeconds; |
| + int numDelete; |
| + |
| + public ExpirationTimeDeletionPolicy(Directory dir, double seconds) { |
| + this.dir = dir; |
| + this.expirationTimeSeconds = seconds; |
| + } |
| + |
| + public void onInit(List commits) throws IOException { |
| + verifyCommitOrder(commits); |
| + onCommit(commits); |
| + } |
| + |
| + public void onCommit(List commits) throws IOException { |
| + verifyCommitOrder(commits); |
| + |
| + IndexCommitPoint lastCommit = (IndexCommitPoint) commits.get(commits.size()-1); |
| + |
| + // Any commit older than expireTime should be deleted: |
| + double expireTime = dir.fileModified(lastCommit.getSegmentsFileName())/1000.0 - expirationTimeSeconds; |
| + |
| + Iterator it = commits.iterator(); |
| + |
| + while(it.hasNext()) { |
| + IndexCommitPoint commit = (IndexCommitPoint) it.next(); |
| + double modTime = dir.fileModified(commit.getSegmentsFileName())/1000.0; |
| + if (commit != lastCommit && modTime < expireTime) { |
| + commit.delete(); |
| + numDelete += 1; |
| + } |
| + } |
| + } |
| + } |
| + |
| + /* |
| + * Test "by time expiration" deletion policy: |
| + */ |
| + public void testExpirationTimeDeletionPolicy() throws IOException, InterruptedException { |
| + |
| + final double SECONDS = 2.0; |
| + |
| + boolean autoCommit = false; |
| + boolean useCompoundFile = true; |
| + |
| + Directory dir = new RAMDirectory(); |
| + ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(dir, SECONDS); |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + writer.close(); |
| + |
| + for(int i=0;i<7;i++) { |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + for(int j=0;j<17;j++) { |
| + addDoc(writer); |
| + } |
| + writer.close(); |
| + |
| + // Make sure to sleep long enough so that some commit |
| + // points will be deleted: |
| + Thread.sleep((int) (1000.0*(SECONDS/5.0))); |
| + } |
| + |
| + // First, make sure the policy in fact deleted something: |
| + assertTrue("no commits were deleted", policy.numDelete > 0); |
| + |
| + // Then simplistic check: just verify that the |
| + // segments_N's that still exist are in fact within SECONDS |
| + // seconds of the last one's mod time, and, that I can |
| + // open a reader on each: |
| + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| + |
| + String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, |
| + "", |
| + gen); |
| + long newestModTime = dir.fileModified(fileName); |
| + |
| + while(gen > 0) { |
| + try { |
| + IndexReader reader = IndexReader.open(dir); |
| + reader.close(); |
| + fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, |
| + "", |
| + gen); |
| + long modTime = dir.fileModified(fileName); |
| + assertTrue("commit point was older than " + SECONDS + " seconds but did not get deleted", newestModTime - modTime < (SECONDS*1000)); |
| + } catch (IOException e) { |
| + // OK |
| + break; |
| + } |
| + |
| + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); |
| + gen--; |
| + } |
| + |
| + dir.close(); |
| + } |
| + |
| + /* |
| + * Test a silly deletion policy that keeps all commits around. |
| + */ |
| + public void testKeepAllDeletionPolicy() throws IOException { |
| + |
| + for(int pass=0;pass<4;pass++) { |
| + |
| + boolean autoCommit = pass < 2; |
| + boolean useCompoundFile = (pass % 2) > 0; |
| + |
| + KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(); |
| + |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + for(int i=0;i<107;i++) { |
| + addDoc(writer); |
| + } |
| + writer.close(); |
| + |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + assertEquals(2, policy.numOnInit); |
| + if (autoCommit) { |
| + assertTrue(policy.numOnCommit > 2); |
| + } else { |
| + // If we are not auto committing then there should |
| + // be exactly 2 commits (one per close above): |
| + assertEquals(2, policy.numOnCommit); |
| + } |
| + |
| + // Simplistic check: just verify all segments_N's still |
| + // exist, and, I can open a reader on each: |
| + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| + while(gen > 0) { |
| + IndexReader reader = IndexReader.open(dir); |
| + reader.close(); |
| + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); |
| + gen--; |
| + |
| + if (gen > 0) { |
| + // Now that we've removed a commit point, which |
| + // should have orphan'd at least one index file. |
| + // Open & close a writer and assert that it |
| + // actually removed something: |
| + int preCount = dir.list().length; |
| + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy); |
| + writer.close(); |
| + int postCount = dir.list().length; |
| + assertTrue(postCount < preCount); |
| + } |
| + } |
| + |
| + dir.close(); |
| + } |
| + } |
| + |
| + /* Test keeping NO commit points. This is a viable and |
| + * useful case eg where you want to build a big index with |
| + * autoCommit false and you know there are no readers. |
| + */ |
| + public void testKeepNoneOnInitDeletionPolicy() throws IOException { |
| + |
| + for(int pass=0;pass<4;pass++) { |
| + |
| + boolean autoCommit = pass < 2; |
| + boolean useCompoundFile = (pass % 2) > 0; |
| + |
| + KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(); |
| + |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + for(int i=0;i<107;i++) { |
| + addDoc(writer); |
| + } |
| + writer.close(); |
| + |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + assertEquals(2, policy.numOnInit); |
| + if (autoCommit) { |
| + assertTrue(policy.numOnCommit > 2); |
| + } else { |
| + // If we are not auto committing then there should |
| + // be exactly 2 commits (one per close above): |
| + assertEquals(2, policy.numOnCommit); |
| + } |
| + |
| + // Simplistic check: just verify the index is in fact |
| + // readable: |
| + IndexReader reader = IndexReader.open(dir); |
| + reader.close(); |
| + |
| + dir.close(); |
| + } |
| + } |
| + |
| + /* |
| + * Test a deletion policy that keeps last N commits. |
| + */ |
| + public void testKeepLastNDeletionPolicy() throws IOException { |
| + |
| + final int N = 5; |
| + |
| + for(int pass=0;pass<4;pass++) { |
| + |
| + boolean autoCommit = pass < 2; |
| + boolean useCompoundFile = (pass % 2) > 0; |
| + |
| + Directory dir = new RAMDirectory(); |
| + |
| + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); |
| + |
| + for(int j=0;j<N+1;j++) { |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + for(int i=0;i<17;i++) { |
| + addDoc(writer); |
| + } |
| + writer.optimize(); |
| + writer.close(); |
| + } |
| + |
| + assertTrue(policy.numDelete > 0); |
| + assertEquals(N+1, policy.numOnInit); |
| + if (autoCommit) { |
| + assertTrue(policy.numOnCommit > 1); |
| + } else { |
| + assertEquals(N+1, policy.numOnCommit); |
| + } |
| + |
| + // Simplistic check: just verify only the past N segments_N's still |
| + // exist, and, I can open a reader on each: |
| + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| + for(int i=0;i<N+1;i++) { |
| + try { |
| + IndexReader reader = IndexReader.open(dir); |
| + reader.close(); |
| + if (i == N) { |
| + fail("should have failed on commits prior to last " + N); |
| + } |
| + } catch (IOException e) { |
| + if (i != N) { |
| + throw e; |
| + } |
| + } |
| + if (i < N) { |
| + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); |
| + } |
| + gen--; |
| + } |
| + |
| + dir.close(); |
| + } |
| + } |
| + |
| + /* |
| + * Test a deletion policy that keeps last N commits |
| + * around, with reader doing deletes. |
| + */ |
| + public void testKeepLastNDeletionPolicyWithReader() throws IOException { |
| + |
| + final int N = 10; |
| + |
| + for(int pass=0;pass<4;pass++) { |
| + |
| + boolean autoCommit = pass < 2; |
| + boolean useCompoundFile = (pass % 2) > 0; |
| + |
| + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); |
| + |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + writer.close(); |
| + Term searchTerm = new Term("content", "aaa"); |
| + Query query = new TermQuery(searchTerm); |
| + |
| + for(int i=0;i<N+1;i++) { |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + for(int j=0;j<17;j++) { |
| + addDoc(writer); |
| + } |
| + // this is a commit when autoCommit=false: |
| + writer.close(); |
| + IndexReader reader = IndexReader.open(dir, policy); |
| + reader.deleteDocument(3*i+1); |
| + reader.setNorm(4*i+1, "content", 2.0F); |
| + IndexSearcher searcher = new IndexSearcher(reader); |
| + Hits hits = searcher.search(query); |
| + assertEquals(16*(1+i), hits.length()); |
| + // this is a commit when autoCommit=false: |
| + reader.close(); |
| + searcher.close(); |
| + } |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + writer.optimize(); |
| + // this is a commit when autoCommit=false: |
| + writer.close(); |
| + |
| + assertEquals(2*(N+2), policy.numOnInit); |
| + if (autoCommit) { |
| + assertTrue(policy.numOnCommit > 2*(N+2)-1); |
| + } else { |
| + assertEquals(2*(N+2)-1, policy.numOnCommit); |
| + } |
| + |
| + IndexSearcher searcher = new IndexSearcher(dir); |
| + Hits hits = searcher.search(query); |
| + assertEquals(176, hits.length()); |
| + |
| + // Simplistic check: just verify only the past N segments_N's still |
| + // exist, and, I can open a reader on each: |
| + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| + |
| + int expectedCount = 176; |
| + |
| + for(int i=0;i<N+1;i++) { |
| + try { |
| + IndexReader reader = IndexReader.open(dir); |
| + |
| + // Work backwards in commits on what the expected |
| + // count should be. Only check this in the |
| + // autoCommit false case: |
| + if (!autoCommit) { |
| + searcher = new IndexSearcher(reader); |
| + hits = searcher.search(query); |
| + if (i > 1) { |
| + if (i % 2 == 0) { |
| + expectedCount += 1; |
| + } else { |
| + expectedCount -= 17; |
| + } |
| + } |
| + assertEquals(expectedCount, hits.length()); |
| + searcher.close(); |
| + } |
| + reader.close(); |
| + if (i == N) { |
| + fail("should have failed on commits before last 5"); |
| + } |
| + } catch (IOException e) { |
| + if (i != N) { |
| + throw e; |
| + } |
| + } |
| + if (i < N) { |
| + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); |
| + } |
| + gen--; |
| + } |
| + |
| + dir.close(); |
| + } |
| + } |
| + |
| + /* |
| + * Test a deletion policy that keeps last N commits |
| + * around, through creates. |
| + */ |
| + public void testKeepLastNDeletionPolicyWithCreates() throws IOException { |
| + |
| + final int N = 10; |
| + |
| + for(int pass=0;pass<4;pass++) { |
| + |
| + boolean autoCommit = pass < 2; |
| + boolean useCompoundFile = (pass % 2) > 0; |
| + |
| + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); |
| + |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + writer.close(); |
| + Term searchTerm = new Term("content", "aaa"); |
| + Query query = new TermQuery(searchTerm); |
| + |
| + for(int i=0;i<N+1;i++) { |
| + |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); |
| + writer.setUseCompoundFile(useCompoundFile); |
| + for(int j=0;j<17;j++) { |
| + addDoc(writer); |
| + } |
| + // this is a commit when autoCommit=false: |
| + writer.close(); |
| + IndexReader reader = IndexReader.open(dir, policy); |
| + reader.deleteDocument(3); |
| + reader.setNorm(5, "content", 2.0F); |
| + IndexSearcher searcher = new IndexSearcher(reader); |
| + Hits hits = searcher.search(query); |
| + assertEquals(16, hits.length()); |
| + // this is a commit when autoCommit=false: |
| + reader.close(); |
| + searcher.close(); |
| + |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); |
| + // This will not commit: there are no changes |
| + // pending because we opened for "create": |
| + writer.close(); |
| + } |
| + |
| + assertEquals(1+3*(N+1), policy.numOnInit); |
| + if (autoCommit) { |
| + assertTrue(policy.numOnCommit > 3*(N+1)-1); |
| + } else { |
| + assertEquals(2*(N+1), policy.numOnCommit); |
| + } |
| + |
| + IndexSearcher searcher = new IndexSearcher(dir); |
| + Hits hits = searcher.search(query); |
| + assertEquals(0, hits.length()); |
| + |
| + // Simplistic check: just verify only the past N segments_N's still |
| + // exist, and, I can open a reader on each: |
| + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| + |
| + int expectedCount = 0; |
| + |
| + for(int i=0;i<N+1;i++) { |
| + try { |
| + IndexReader reader = IndexReader.open(dir); |
| + |
| + // Work backwards in commits on what the expected |
| + // count should be. Only check this in the |
| + // autoCommit false case: |
| + if (!autoCommit) { |
| + searcher = new IndexSearcher(reader); |
| + hits = searcher.search(query); |
| + assertEquals(expectedCount, hits.length()); |
| + searcher.close(); |
| + if (expectedCount == 0) { |
| + expectedCount = 16; |
| + } else if (expectedCount == 16) { |
| + expectedCount = 17; |
| + } else if (expectedCount == 17) { |
| + expectedCount = 0; |
| + } |
| + } |
| + reader.close(); |
| + if (i == N) { |
| + fail("should have failed on commits before last " + N); |
| + } |
| + } catch (IOException e) { |
| + if (i != N) { |
| + throw e; |
| + } |
| + } |
| + if (i < N) { |
| + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); |
| + } |
| + gen--; |
| + } |
| + |
| + dir.close(); |
| + } |
| + } |
| + |
| + private void addDoc(IndexWriter writer) throws IOException |
| + { |
| + Document doc = new Document(); |
| + doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); |
| + writer.addDocument(doc); |
| + } |
| +} |
| |
| Property changes on: src/test/org/apache/lucene/index/TestDeletionPolicy.java |
| ___________________________________________________________________ |
| Name: svn:eol-style |
| + native |
| |
| Index: src/test/org/apache/lucene/index/TestBackwardsCompatibility.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 515500) |
| +++ src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) |
| @@ -21,7 +21,7 @@ |
| import java.util.zip.*; |
| |
| /* |
| - Verify we can read the pre-XXX file format, do searches |
| + Verify we can read the pre-2.1 file format, do searches |
| against it, and add documents to it. |
| */ |
| |
| @@ -104,8 +104,12 @@ |
| for(int i=0;i<oldNames.length;i++) { |
| String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i]; |
| unzip(dirName, oldNames[i]); |
| - changeIndexNoAdds(oldNames[i]); |
| + changeIndexNoAdds(oldNames[i], true); |
| rmDir(oldNames[i]); |
| + |
| + unzip(dirName, oldNames[i]); |
| + changeIndexNoAdds(oldNames[i], false); |
| + rmDir(oldNames[i]); |
| } |
| } |
| |
| @@ -114,8 +118,12 @@ |
| for(int i=0;i<oldNames.length;i++) { |
| String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i]; |
| unzip(dirName, oldNames[i]); |
| - changeIndexWithAdds(oldNames[i]); |
| + changeIndexWithAdds(oldNames[i], true); |
| rmDir(oldNames[i]); |
| + |
| + unzip(dirName, oldNames[i]); |
| + changeIndexWithAdds(oldNames[i], false); |
| + rmDir(oldNames[i]); |
| } |
| } |
| |
| @@ -141,13 +149,14 @@ |
| |
| /* Open pre-lockless index, add docs, do a delete & |
| * setNorm, and search */ |
| - public void changeIndexWithAdds(String dirName) throws IOException { |
| + public void changeIndexWithAdds(String dirName, boolean autoCommit) throws IOException { |
| |
| dirName = fullDir(dirName); |
| |
| Directory dir = FSDirectory.getDirectory(dirName); |
| + |
| // open writer |
| - IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); |
| |
| // add 10 docs |
| for(int i=0;i<10;i++) { |
| @@ -166,7 +175,7 @@ |
| assertEquals("wrong first document", "21", d.get("id")); |
| searcher.close(); |
| |
| - // make sure we can do another delete & another setNorm against this |
| + // make sure we can do delete & setNorm against this |
| // pre-lockless segment: |
| IndexReader reader = IndexReader.open(dir); |
| Term searchTerm = new Term("id", "6"); |
| @@ -175,7 +184,7 @@ |
| reader.setNorm(22, "content", (float) 2.0); |
| reader.close(); |
| |
| - // make sure 2nd delete & 2nd norm "took": |
| + // make sure they "took": |
| searcher = new IndexSearcher(dir); |
| hits = searcher.search(new TermQuery(new Term("content", "aaa"))); |
| assertEquals("wrong number of hits", 43, hits.length()); |
| @@ -184,7 +193,7 @@ |
| searcher.close(); |
| |
| // optimize |
| - writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); |
| + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); |
| writer.optimize(); |
| writer.close(); |
| |
| @@ -200,7 +209,7 @@ |
| |
| /* Open pre-lockless index, add docs, do a delete & |
| * setNorm, and search */ |
| - public void changeIndexNoAdds(String dirName) throws IOException { |
| + public void changeIndexNoAdds(String dirName, boolean autoCommit) throws IOException { |
| |
| dirName = fullDir(dirName); |
| |
| @@ -214,7 +223,7 @@ |
| assertEquals("wrong first document", "21", d.get("id")); |
| searcher.close(); |
| |
| - // make sure we can do another delete & another setNorm against this |
| + // make sure we can do a delete & setNorm against this |
| // pre-lockless segment: |
| IndexReader reader = IndexReader.open(dir); |
| Term searchTerm = new Term("id", "6"); |
| @@ -223,7 +232,7 @@ |
| reader.setNorm(22, "content", (float) 2.0); |
| reader.close(); |
| |
| - // make sure 2nd delete & 2nd norm "took": |
| + // make sure they "took": |
| searcher = new IndexSearcher(dir); |
| hits = searcher.search(new TermQuery(new Term("content", "aaa"))); |
| assertEquals("wrong number of hits", 33, hits.length()); |
| @@ -232,7 +241,7 @@ |
| searcher.close(); |
| |
| // optimize |
| - IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); |
| writer.optimize(); |
| writer.close(); |
| |
| @@ -273,66 +282,78 @@ |
| |
| /* Verifies that the expected file names were produced */ |
| |
| - // disable until hardcoded file names are fixes: |
| public void testExactFileNames() throws IOException { |
| |
| - String outputDir = "lucene.backwardscompat0.index"; |
| - Directory dir = FSDirectory.getDirectory(fullDir(outputDir)); |
| - IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); |
| - for(int i=0;i<35;i++) { |
| - addDoc(writer, i); |
| - } |
| - assertEquals("wrong doc count", 35, writer.docCount()); |
| - writer.close(); |
| + for(int pass=0;pass<2;pass++) { |
| |
| - // Delete one doc so we get a .del file: |
| - IndexReader reader = IndexReader.open(dir); |
| - Term searchTerm = new Term("id", "7"); |
| - int delCount = reader.deleteDocuments(searchTerm); |
| - assertEquals("didn't delete the right number of documents", 1, delCount); |
| + String outputDir = "lucene.backwardscompat0.index"; |
| |
| - // Set one norm so we get a .s0 file: |
| - reader.setNorm(21, "content", (float) 1.5); |
| - reader.close(); |
| + try { |
| + Directory dir = FSDirectory.getDirectory(fullDir(outputDir)); |
| |
| - // The numbering of fields can vary depending on which |
| - // JRE is in use. On some JREs we see content bound to |
| - // field 0; on others, field 1. So, here we have to |
| - // figure out which field number corresponds to |
| - // "content", and then set our expected file names below |
| - // accordingly: |
| - CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); |
| - FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); |
| - int contentFieldIndex = -1; |
| - for(int i=0;i<fieldInfos.size();i++) { |
| - FieldInfo fi = fieldInfos.fieldInfo(i); |
| - if (fi.name.equals("content")) { |
| - contentFieldIndex = i; |
| - break; |
| - } |
| - } |
| - cfsReader.close(); |
| - assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1); |
| + boolean autoCommit = 0 == pass; |
| + |
| + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); |
| + //IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); |
| + for(int i=0;i<35;i++) { |
| + addDoc(writer, i); |
| + } |
| + assertEquals("wrong doc count", 35, writer.docCount()); |
| + writer.close(); |
| |
| - // Now verify file names: |
| - String[] expected = {"_0.cfs", |
| - "_0_1.del", |
| - "_1.cfs", |
| - "_2.cfs", |
| - "_2_1.s" + contentFieldIndex, |
| - "_3.cfs", |
| - "segments_a", |
| - "segments.gen"}; |
| + // Delete one doc so we get a .del file: |
| + IndexReader reader = IndexReader.open(dir); |
| + Term searchTerm = new Term("id", "7"); |
| + int delCount = reader.deleteDocuments(searchTerm); |
| + assertEquals("didn't delete the right number of documents", 1, delCount); |
| |
| - String[] actual = dir.list(); |
| - Arrays.sort(expected); |
| - Arrays.sort(actual); |
| - if (!Arrays.equals(expected, actual)) { |
| - fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual)); |
| + // Set one norm so we get a .s0 file: |
| + reader.setNorm(21, "content", (float) 1.5); |
| + reader.close(); |
| + |
| + // The numbering of fields can vary depending on which |
| + // JRE is in use. On some JREs we see content bound to |
| + // field 0; on others, field 1. So, here we have to |
| + // figure out which field number corresponds to |
| + // "content", and then set our expected file names below |
| + // accordingly: |
| + CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); |
| + FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); |
| + int contentFieldIndex = -1; |
| + for(int i=0;i<fieldInfos.size();i++) { |
| + FieldInfo fi = fieldInfos.fieldInfo(i); |
| + if (fi.name.equals("content")) { |
| + contentFieldIndex = i; |
| + break; |
| + } |
| + } |
| + cfsReader.close(); |
| + assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1); |
| + |
| + // Now verify file names: |
| + String[] expected = {"_0.cfs", |
| + "_0_1.del", |
| + "_1.cfs", |
| + "_2.cfs", |
| + "_2_1.s" + contentFieldIndex, |
| + "_3.cfs", |
| + "segments_a", |
| + "segments.gen"}; |
| + if (!autoCommit) { |
| + expected[6] = "segments_3"; |
| + } |
| + |
| + String[] actual = dir.list(); |
| + Arrays.sort(expected); |
| + Arrays.sort(actual); |
| + if (!Arrays.equals(expected, actual)) { |
| + fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual)); |
| + } |
| + dir.close(); |
| + } finally { |
| + rmDir(outputDir); |
| + } |
| } |
| - dir.close(); |
| - |
| - rmDir(outputDir); |
| } |
| |
| private String asString(String[] l) { |
| Index: src/java/org/apache/lucene/index/IndexDeletionPolicy.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexDeletionPolicy.java (revision 0) |
| +++ src/java/org/apache/lucene/index/IndexDeletionPolicy.java (revision 0) |
| @@ -0,0 +1,83 @@ |
| +package org.apache.lucene.index; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.util.List; |
| +import java.io.IOException; |
| + |
| +/** |
| + * <p>Expert: implement this interface, and pass it to one |
| + * of the {@link IndexWriter} or {@link IndexReader} |
| + * constructors, to customize when "point in time" commits |
| + * are deleted from an index. The default deletion policy |
| + * is {@link KeepOnlyLastCommitDeletionPolicy}, which always |
| + * removes old commits as soon as a new commit is done (this |
| + * matches the behavior before 2.2).</p> |
| + * |
| + * <p>One expected use case for this (and the reason why it |
| + * was first created) is to work around problems with an |
| + * index directory accessed via filesystems like NFS because |
| + * NFS does not provide the "delete on last close" semantics |
| + * that Lucene's "point in time" search normally relies on. |
| + * By implementing a custom deletion policy, such as "a |
| + * commit is only removed once it has been stale for more |
| + * than X minutes", you can give your readers time to |
| + * refresh to the new commit before {@link IndexWriter} |
| + * removes the old commits. Note that doing so will |
| + * increase the storage requirements of the index. See <a |
| + * target="top" |
| + * href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a> |
| + * for details.</p> |
| + */ |
| + |
| +public interface IndexDeletionPolicy { |
| + |
| + /** |
| + * <p>This is called once when a writer is first |
| + * instantiated to give the policy a chance to remove old |
| + * commit points.</p> |
| + * |
| + * <p>The writer locates all commits present in the index |
| + * and calls this method. The policy may choose to delete |
| + * commit points. To delete a commit point, call the |
| + * {@link IndexCommitPoint#delete} method.</p> |
| + * |
| + * @param commits List of {@link IndexCommitPoint}, |
| + * sorted by age (the 0th one is the oldest commit). |
| + */ |
| + public void onInit(List commits) throws IOException; |
| + |
| + /** |
| + * <p>This is called each time the writer commits. This |
| + * gives the policy a chance to remove old commit points |
| + * with each commit.</p> |
| + * |
| + * <p>If writer has <code>autoCommit = true</code> then |
| + * this method will in general be called many times during |
| + * one instance of {@link IndexWriter}. If |
| + * <code>autoCommit = false</code> then this method is |
| + * only called once when {@link IndexWriter#close} is |
| + * called, or not at all if the {@link IndexWriter#abort} |
| + * is called. The policy may now choose to delete old |
| + * commit points by calling {@link IndexCommitPoint#delete}. |
| + * |
| + * @param commits List of {@link IndexCommitPoint}>, |
| + * sorted by age (the 0th one is the oldest commit). |
| + */ |
| + public void onCommit(List commits) throws IOException; |
| +} |
| |
| Property changes on: src/java/org/apache/lucene/index/IndexDeletionPolicy.java |
| ___________________________________________________________________ |
| Name: svn:eol-style |
| + native |
| |
| Index: src/java/org/apache/lucene/index/MultiReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/MultiReader.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) |
| @@ -220,13 +220,6 @@ |
| return new MultiTermPositions(subReaders, starts); |
| } |
| |
| - protected void setDeleter(IndexFileDeleter deleter) { |
| - // Share deleter to our SegmentReaders: |
| - this.deleter = deleter; |
| - for (int i = 0; i < subReaders.length; i++) |
| - subReaders[i].setDeleter(deleter); |
| - } |
| - |
| protected void doCommit() throws IOException { |
| for (int i = 0; i < subReaders.length; i++) |
| subReaders[i].commit(); |
| Index: src/java/org/apache/lucene/index/IndexReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexReader.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) |
| @@ -114,7 +114,7 @@ |
| private Directory directory; |
| private boolean directoryOwner; |
| private boolean closeDirectory; |
| - protected IndexFileDeleter deleter; |
| + private IndexDeletionPolicy deletionPolicy; |
| private boolean isClosed; |
| |
| private SegmentInfos segmentInfos; |
| @@ -131,30 +131,44 @@ |
| path. |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| - */ |
| + * @param path the path to the index directory */ |
| public static IndexReader open(String path) throws CorruptIndexException, IOException { |
| - return open(FSDirectory.getDirectory(path), true); |
| + return open(FSDirectory.getDirectory(path), true, null); |
| } |
| |
| /** Returns an IndexReader reading the index in an FSDirectory in the named |
| - path. |
| + * path. |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| - */ |
| + * @param path the path to the index directory */ |
| public static IndexReader open(File path) throws CorruptIndexException, IOException { |
| - return open(FSDirectory.getDirectory(path), true); |
| + return open(FSDirectory.getDirectory(path), true, null); |
| } |
| |
| /** Returns an IndexReader reading the index in the given Directory. |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| + * @param directory the index directory |
| */ |
| public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { |
| - return open(directory, false); |
| + return open(directory, false, null); |
| } |
| |
| - private static IndexReader open(final Directory directory, final boolean closeDirectory) throws CorruptIndexException, IOException { |
| + /** Expert: returns an IndexReader reading the index in the given |
| + * Directory, with a custom {@link IndexDeletionPolicy}. |
| + * @param directory the index directory |
| + * @param deletionPolicy a custom deletion policy (only used |
| + * if you use this reader to perform deletes or to set |
| + * norms); see {@link IndexWriter} for details. |
| + * @throws CorruptIndexException if the index is corrupt |
| + * @throws IOException if there is a low-level IO error |
| + */ |
| + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { |
| + return open(directory, false, deletionPolicy); |
| + } |
| |
| + private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { |
| + |
| return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) { |
| |
| protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException { |
| @@ -162,8 +176,10 @@ |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(directory, segmentFileName); |
| |
| + IndexReader reader; |
| + |
| if (infos.size() == 1) { // index is optimized |
| - return SegmentReader.get(infos, infos.info(0), closeDirectory); |
| + reader = SegmentReader.get(infos, infos.info(0), closeDirectory); |
| } else { |
| |
| // To reduce the chance of hitting FileNotFound |
| @@ -184,8 +200,10 @@ |
| } |
| } |
| |
| - return new MultiReader(directory, infos, closeDirectory, readers); |
| + reader = new MultiReader(directory, infos, closeDirectory, readers); |
| } |
| + reader.deletionPolicy = deletionPolicy; |
| + return reader; |
| } |
| }.run(); |
| } |
| @@ -715,20 +733,14 @@ |
| */ |
| protected final synchronized void commit() throws IOException { |
| if(hasChanges){ |
| - if (deleter == null) { |
| - // In the MultiReader case, we share this deleter |
| - // across all SegmentReaders: |
| - setDeleter(new IndexFileDeleter(segmentInfos, directory)); |
| - } |
| if(directoryOwner){ |
| |
| - // Should not be necessary: no prior commit should |
| - // have left pending files, so just defensive: |
| - deleter.clearPendingFiles(); |
| + // Default deleter (for backwards compatibility) is |
| + // KeepOnlyLastCommitDeleter: |
| + IndexFileDeleter deleter = new IndexFileDeleter(directory, |
| + deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, |
| + segmentInfos, null); |
| |
| - String oldInfoFileName = segmentInfos.getCurrentSegmentFileName(); |
| - String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); |
| - |
| // Checkpoint the state we are about to change, in |
| // case we have to roll back: |
| startCommit(); |
| @@ -749,24 +761,16 @@ |
| // actually in the index): |
| rollbackCommit(); |
| |
| - // Erase any pending files that we were going to delete: |
| - deleter.clearPendingFiles(); |
| - |
| - // Remove possibly partially written next |
| - // segments file: |
| - deleter.deleteFile(nextSegmentsFileName); |
| - |
| // Recompute deletable files & remove them (so |
| // partially written .del files, etc, are |
| // removed): |
| - deleter.findDeletableFiles(); |
| - deleter.deleteFiles(); |
| + deleter.refresh(); |
| } |
| } |
| |
| - // Attempt to delete all files we just obsoleted: |
| - deleter.deleteFile(oldInfoFileName); |
| - deleter.commitPendingFiles(); |
| + // Have the deleter remove any now unreferenced |
| + // files due to this commit: |
| + deleter.checkpoint(segmentInfos, true); |
| |
| if (writeLock != null) { |
| writeLock.release(); // release write lock |
| @@ -779,13 +783,6 @@ |
| hasChanges = false; |
| } |
| |
| - protected void setDeleter(IndexFileDeleter deleter) { |
| - this.deleter = deleter; |
| - } |
| - protected IndexFileDeleter getDeleter() { |
| - return deleter; |
| - } |
| - |
| /** Implements commit. */ |
| protected abstract void doCommit() throws IOException; |
| |
| Index: src/java/org/apache/lucene/index/IndexCommitPoint.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0) |
| +++ src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0) |
| @@ -0,0 +1,41 @@ |
| +package org.apache.lucene.index; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/** |
| + * Represents a single commit into an index as seen by the |
| + * {@link IndexDeletionPolicy}. |
| + */ |
| + |
| +public interface IndexCommitPoint { |
| + |
| + /** |
| + * Get the segments file (ie, <code>segments_N</code>) of |
| + * this commit point. |
| + */ |
| + public String getSegmentsFileName(); |
| + |
| + /** |
| + * Notify the writer that this commit point should be |
| + * deleted. This should only be called by the {@link |
| + * IndexDeletionPolicy} during its {@link |
| + * IndexDeletionPolicy#onInit} or {@link |
| + * IndexDeletionPolicy#onCommit} method. |
| + */ |
| + public void delete(); |
| +} |
| |
| Property changes on: src/java/org/apache/lucene/index/IndexCommitPoint.java |
| ___________________________________________________________________ |
| Name: svn:eol-style |
| + native |
| |
| Index: src/java/org/apache/lucene/index/IndexFileNames.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) |
| @@ -37,7 +37,19 @@ |
| |
| /** Extension of norms file */ |
| static final String NORMS_EXTENSION = "nrm"; |
| - |
| + |
| + /** Extension of compound file */ |
| + static final String COMPOUND_FILE_EXTENSION = "cfs"; |
| + |
| + /** Extension of deletes */ |
| + static final String DELETES_EXTENSION = "del"; |
| + |
| + /** Extension of single norms */ |
| + static final String SINGLE_NORMS_EXTENSION = "f"; |
| + |
| + /** Extension of separate norms */ |
| + static final String SEPARATE_NORMS_EXTENSION = "s"; |
| + |
| /** |
| * This array contains all filename extensions used by |
| * Lucene's index files, with two exceptions, namely the |
| @@ -50,6 +62,13 @@ |
| "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", |
| "tvx", "tvd", "tvf", "gen", "nrm" |
| }; |
| + |
| + /** File extensions that are added to a compound file |
| + * (same as above, minus "del", "gen", "cfs"). */ |
| + static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] { |
| + "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", |
| + "tvx", "tvd", "tvf", "nrm" |
| + }; |
| |
| /** File extensions of old-style index files */ |
| static final String COMPOUND_EXTENSIONS[] = new String[] { |
| Index: src/java/org/apache/lucene/index/SegmentInfos.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) |
| @@ -88,16 +88,9 @@ |
| for (int i = 0; i < files.length; i++) { |
| String file = files[i]; |
| if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) { |
| - if (file.equals(IndexFileNames.SEGMENTS)) { |
| - // Pre lock-less commits: |
| - if (max == -1) { |
| - max = 0; |
| - } |
| - } else { |
| - long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX); |
| - if (v > max) { |
| - max = v; |
| - } |
| + long gen = generationFromSegmentsFileName(file); |
| + if (gen > max) { |
| + max = gen; |
| } |
| } |
| } |
| @@ -152,6 +145,22 @@ |
| } |
| |
| /** |
| + * Parse the generation off the segments file name and |
| + * return it. |
| + */ |
| + public static long generationFromSegmentsFileName(String fileName) { |
| + if (fileName.equals(IndexFileNames.SEGMENTS)) { |
| + return 0; |
| + } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) { |
| + return Long.parseLong(fileName.substring(1+IndexFileNames.SEGMENTS.length()), |
| + Character.MAX_RADIX); |
| + } else { |
| + throw new IllegalArgumentException("fileName \"" + fileName + "\" is not a segments file"); |
| + } |
| + } |
| + |
| + |
| + /** |
| * Get the next segments_N filename that will be written. |
| */ |
| public String getNextSegmentFileName() { |
| @@ -181,12 +190,8 @@ |
| |
| IndexInput input = directory.openInput(segmentFileName); |
| |
| - if (segmentFileName.equals(IndexFileNames.SEGMENTS)) { |
| - generation = 0; |
| - } else { |
| - generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), |
| - Character.MAX_RADIX); |
| - } |
| + generation = generationFromSegmentsFileName(segmentFileName); |
| + |
| lastGeneration = generation; |
| |
| try { |
| @@ -255,6 +260,8 @@ |
| |
| IndexOutput output = directory.createOutput(segmentFileName); |
| |
| + boolean success = false; |
| + |
| try { |
| output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT |
| output.writeLong(++version); // every write changes |
| @@ -266,7 +273,16 @@ |
| } |
| } |
| finally { |
| - output.close(); |
| + try { |
| + output.close(); |
| + success = true; |
| + } finally { |
| + if (!success) { |
| + // Try not to leave a truncated segments_N file in |
| + // the index: |
| + directory.deleteFile(segmentFileName); |
| + } |
| + } |
| } |
| |
| try { |
| @@ -305,6 +321,9 @@ |
| public long getVersion() { |
| return version; |
| } |
| + public long getGeneration() { |
| + return generation; |
| + } |
| |
| /** |
| * Current version number from segments file. |
| Index: src/java/org/apache/lucene/index/IndexWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexWriter.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) |
| @@ -29,48 +29,100 @@ |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| -import java.util.Vector; |
| -import java.util.HashSet; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.Map.Entry; |
| |
| /** |
| - An IndexWriter creates and maintains an index. |
| + An <code>IndexWriter</code> creates and maintains an index. |
| |
| - <p>The third argument (<code>create</code>) to the |
| + <p>The <code>create</code> argument to the |
| <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a> |
| determines whether a new index is created, or whether an existing index is |
| - opened for the addition of new documents. Note that you |
| - can open an index with create=true even while readers are |
| + opened. Note that you |
| + can open an index with <code>create=true</code> even while readers are |
| using the index. The old readers will continue to search |
| the "point in time" snapshot they had opened, and won't |
| - see the newly created index until they re-open.</p> |
| + see the newly created index until they re-open. There are |
| + also <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a> |
| + with no <code>create</code> argument which |
| + will create a new index if there is not already an index at the |
| + provided path and otherwise open the existing index.</p> |
| |
| - <p>In either case, documents are added with the <a |
| - href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method. |
| - When finished adding documents, <a href="#close()"><b>close</b></a> should be called.</p> |
| + <p>In either case, documents are added with <a |
| + href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> |
| + and removed with <a |
| + href="#deleteDocuments(org.apache.lucene.index.Term)"><b>deleteDocuments</b></a>. |
| + A document can be updated with <a href="#updateDocument(org.apache.lucene.index.Term, org.apache.lucene.document.Document)"><b>updateDocument</b></a> |
| + (which just deletes and then adds). When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p> |
| |
| + <p>These changes are buffered in memory and periodically |
| + flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are |
| + enough buffered deletes (see {@link |
| + #setMaxBufferedDeleteTerms}) or enough added documents |
| + (see {@link #setMaxBufferedDocs}) since the last flush, |
| + whichever is sooner. When a flush occurs, both pending |
| + deletes and added documents are flushed to the index. A |
| + flush may also trigger one or more segment merges.</p> |
| + |
| + <a name="autoCommit"></a> |
| + <p>The optional <code>autoCommit</code> argument to the |
| + <a href="#IndexWriter(org.apache.lucene.store.Directory, boolean, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a> |
| + controls visibility of the changes to {@link IndexReader} instances reading the same index. |
| + When this is <code>false</code>, changes are not |
| + visible until {@link #close()} is called. |
| + Note that changes will still be flushed to the |
| + {@link org.apache.lucene.store.Directory} as new files, |
| + but are not committed (no new <code>segments_N</code> file |
| + is written referencing the new files) until {@link #close} is |
| + called. If something goes terribly wrong (for example the |
| + JVM crashes) before {@link #close()}, then |
| + the index will reflect none of the changes made (it will |
| + remain in its starting state). |
| + You can also call {@link #abort()}, which closes the writer without committing any |
| + changes, and removes any index |
| + files that had been flushed but are now unreferenced. |
| + This mode is useful for preventing readers from refreshing |
| + at a bad time (for example after you've done all your |
| + deletes but before you've done your adds). |
| + It can also be used to implement simple single-writer |
| + transactional semantics ("all or none").</p> |
| + |
| + <p>When <code>autoCommit</code> is <code>true</code> then |
| + every flush is also a commit ({@link IndexReader} |
| + instances will see each flush as changes to the index). |
| + This is the default, to match the behavior before 2.2. |
| + When running in this mode, be careful not to refresh your |
| + readers while optimize or segment merges are taking place |
| + as this can tie up substantial disk space.</p> |
| + |
| <p>If an index will not have more documents added for a while and optimal search |
| performance is desired, then the <a href="#optimize()"><b>optimize</b></a> |
| method should be called before the index is closed.</p> |
| - |
| - <p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open |
| - another IndexWriter on the same directory will lead to a |
| + |
| + <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open |
| + another <code>IndexWriter</code> on the same directory will lead to a |
| {@link LockObtainFailedException}. The {@link LockObtainFailedException} |
| is also thrown if an IndexReader on the same directory is used to delete documents |
| from the index.</p> |
| |
| - <p>As of <b>2.1</b>, IndexWriter can now delete documents |
| - by {@link Term} (see {@link #deleteDocuments} ) and update |
| - (delete then add) documents (see {@link #updateDocument}). |
| - Deletes are buffered until {@link |
| - #setMaxBufferedDeleteTerms} <code>Terms</code> at which |
| - point they are flushed to the index. Note that a flush |
| - occurs when there are enough buffered deletes or enough |
| - added documents, whichever is sooner. When a flush |
| - occurs, both pending deletes and added documents are |
| - flushed to the index.</p> |
| + <a name="deletionPolicy"></a> |
| + <p>Expert: <code>IndexWriter</code> allows an optional |
| + {@link IndexDeletionPolicy} implementation to be |
| + specified. You can use this to control when prior commits |
| + are deleted from the index. The default policy is {@link |
| + KeepOnlyLastCommitDeletionPolicy} which removes all prior |
| + commits as soon as a new commit is done (this matches |
| + behavior before 2.2). Creating your own policy can allow |
| + you to explicitly keep previous "point in time" commits |
| + alive in the index for some time, to allow readers to |
| + refresh to the new commit without having the old commit |
| + deleted out from under them. This is necessary on |
| + filesystems like NFS that do not support "delete on last |
| + close" semantics, which Lucene's "point in time" search |
| + normally relies on. </p> |
| */ |
| |
| public class IndexWriter { |
| @@ -83,6 +135,9 @@ |
| |
| private long writeLockTimeout = WRITE_LOCK_TIMEOUT; |
| |
| + /** |
| + * Name of the write lock in the index. |
| + */ |
| public static final String WRITE_LOCK_NAME = "write.lock"; |
| |
| /** |
| @@ -120,11 +175,13 @@ |
| |
| private Similarity similarity = Similarity.getDefault(); // how to normalize |
| |
| - private boolean inTransaction = false; // true iff we are in a transaction |
| private boolean commitPending; // true if segmentInfos has changes not yet committed |
| - private HashSet protectedSegments; // segment names that should not be deleted until commit |
| private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails |
| |
| + private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails |
| + private boolean localAutoCommit; // saved autoCommit during local transaction |
| + private boolean autoCommit = true; // false if we should commit only on close |
| + |
| SegmentInfos segmentInfos = new SegmentInfos(); // the segments |
| SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory |
| private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs |
| @@ -238,7 +295,7 @@ |
| */ |
| public IndexWriter(String path, Analyzer a, boolean create) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - init(path, a, create); |
| + init(FSDirectory.getDirectory(path), a, create, true, null, true); |
| } |
| |
| /** |
| @@ -263,7 +320,7 @@ |
| */ |
| public IndexWriter(File path, Analyzer a, boolean create) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - init(path, a, create); |
| + init(FSDirectory.getDirectory(path), a, create, true, null, true); |
| } |
| |
| /** |
| @@ -288,14 +345,14 @@ |
| */ |
| public IndexWriter(Directory d, Analyzer a, boolean create) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - init(d, a, create, false); |
| + init(d, a, create, false, null, true); |
| } |
| |
| /** |
| * Constructs an IndexWriter for the index in |
| - * <code>path</code>, creating it first if it does not |
| - * already exist, otherwise appending to the existing |
| - * index. Text will be analyzed with <code>a</code>. |
| + * <code>path</code>, first creating it if it does not |
| + * already exist. Text will be analyzed with |
| + * <code>a</code>. |
| * |
| * @param path the path to the index directory |
| * @param a the analyzer to use |
| @@ -309,18 +366,13 @@ |
| */ |
| public IndexWriter(String path, Analyzer a) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - if (IndexReader.indexExists(path)) { |
| - init(path, a, false); |
| - } else { |
| - init(path, a, true); |
| - } |
| + init(FSDirectory.getDirectory(path), a, true, null, true); |
| } |
| |
| /** |
| * Constructs an IndexWriter for the index in |
| - * <code>path</code>, creating it first if it does not |
| - * already exist, otherwise appending to the existing |
| - * index. Text will be analyzed with |
| + * <code>path</code>, first creating it if it does not |
| + * already exist. Text will be analyzed with |
| * <code>a</code>. |
| * |
| * @param path the path to the index directory |
| @@ -335,18 +387,14 @@ |
| */ |
| public IndexWriter(File path, Analyzer a) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - if (IndexReader.indexExists(path)) { |
| - init(path, a, false); |
| - } else { |
| - init(path, a, true); |
| - } |
| + init(FSDirectory.getDirectory(path), a, true, null, true); |
| } |
| |
| /** |
| * Constructs an IndexWriter for the index in |
| - * <code>d</code>, creating it first if it does not |
| - * already exist, otherwise appending to the existing |
| - * index. Text will be analyzed with <code>a</code>. |
| + * <code>d</code>, first creating it if it does not |
| + * already exist. Text will be analyzed with |
| + * <code>a</code>. |
| * |
| * @param d the index directory |
| * @param a the analyzer to use |
| @@ -360,28 +408,124 @@ |
| */ |
| public IndexWriter(Directory d, Analyzer a) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - if (IndexReader.indexExists(d)) { |
| - init(d, a, false, false); |
| - } else { |
| - init(d, a, true, false); |
| - } |
| + init(d, a, false, null, true); |
| } |
| |
| - private void init(String path, Analyzer a, final boolean create) |
| + /** |
| + * Constructs an IndexWriter for the index in |
| + * <code>d</code>, first creating it if it does not |
| + * already exist. Text will be analyzed with |
| + * <code>a</code>. |
| + * |
| + * @param d the index directory |
| + * @param autoCommit see <a href="#autoCommit">above</a> |
| + * @param a the analyzer to use |
| + * @throws CorruptIndexException if the index is corrupt |
| + * @throws LockObtainFailedException if another writer |
| + * has this index open (<code>write.lock</code> could not |
| + * be obtained) |
| + * @throws IOException if the directory cannot be |
| + * read/written to or if there is any other low-level |
| + * IO error |
| + */ |
| + public IndexWriter(Directory d, boolean autoCommit, Analyzer a) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - init(FSDirectory.getDirectory(path), a, create, true); |
| + init(d, a, false, null, autoCommit); |
| } |
| |
| - private void init(File path, Analyzer a, final boolean create) |
| + /** |
| + * Constructs an IndexWriter for the index in <code>d</code>. |
| + * Text will be analyzed with <code>a</code>. If <code>create</code> |
| + * is true, then a new, empty index will be created in |
| + * <code>d</code>, replacing the index already there, if any. |
| + * |
| + * @param d the index directory |
| + * @param autoCommit see <a href="#autoCommit">above</a> |
| + * @param a the analyzer to use |
| + * @param create <code>true</code> to create the index or overwrite |
| + * the existing one; <code>false</code> to append to the existing |
| + * index |
| + * @throws CorruptIndexException if the index is corrupt |
| + * @throws LockObtainFailedException if another writer |
| + * has this index open (<code>write.lock</code> could not |
| + * be obtained) |
| + * @throws IOException if the directory cannot be read/written to, or |
| + * if it does not exist and <code>create</code> is |
| + * <code>false</code> or if there is any other low-level |
| + * IO error |
| + */ |
| + public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create) |
| + throws CorruptIndexException, LockObtainFailedException, IOException { |
| + init(d, a, create, false, null, autoCommit); |
| + } |
| + |
| + /** |
| + * Expert: constructs an IndexWriter with a custom {@link |
| + * IndexDeletionPolicy}, for the index in <code>d</code>, |
| + * first creating it if it does not already exist. Text |
| + * will be analyzed with <code>a</code>. |
| + * |
| + * @param d the index directory |
| + * @param autoCommit see <a href="#autoCommit">above</a> |
| + * @param a the analyzer to use |
| + * @param deletionPolicy see <a href="#deletionPolicy">above</a> |
| + * @throws CorruptIndexException if the index is corrupt |
| + * @throws LockObtainFailedException if another writer |
| + * has this index open (<code>write.lock</code> could not |
| + * be obtained) |
| + * @throws IOException if the directory cannot be |
| + * read/written to or if there is any other low-level |
| + * IO error |
| + */ |
| + public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| - init(FSDirectory.getDirectory(path), a, create, true); |
| + init(d, a, false, deletionPolicy, autoCommit); |
| } |
| |
| - private void init(Directory d, Analyzer a, final boolean create, boolean closeDir) |
| + /** |
| + * Expert: constructs an IndexWriter with a custom {@link |
| + * IndexDeletionPolicy}, for the index in <code>d</code>. |
| + * Text will be analyzed with <code>a</code>. If |
| + * <code>create</code> is true, then a new, empty index |
| + * will be created in <code>d</code>, replacing the index |
| + * already there, if any. |
| + * |
| + * @param d the index directory |
| + * @param autoCommit see <a href="#autoCommit">above</a> |
| + * @param a the analyzer to use |
| + * @param create <code>true</code> to create the index or overwrite |
| + * the existing one; <code>false</code> to append to the existing |
| + * index |
| + * @param deletionPolicy see <a href="#deletionPolicy">above</a> |
| + * @throws CorruptIndexException if the index is corrupt |
| + * @throws LockObtainFailedException if another writer |
| + * has this index open (<code>write.lock</code> could not |
| + * be obtained) |
| + * @throws IOException if the directory cannot be read/written to, or |
| + * if it does not exist and <code>create</code> is |
| + * <code>false</code> or if there is any other low-level |
| + * IO error |
| + */ |
| + public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy) |
| + throws CorruptIndexException, LockObtainFailedException, IOException { |
| + init(d, a, create, false, deletionPolicy, autoCommit); |
| + } |
| + |
| + private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit) |
| throws CorruptIndexException, LockObtainFailedException, IOException { |
| + if (IndexReader.indexExists(d)) { |
| + init(d, a, false, closeDir, deletionPolicy, autoCommit); |
| + } else { |
| + init(d, a, true, closeDir, deletionPolicy, autoCommit); |
| + } |
| + } |
| + |
| + private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit) |
| + throws CorruptIndexException, LockObtainFailedException, IOException { |
| this.closeDir = closeDir; |
| directory = d; |
| analyzer = a; |
| + this.infoStream = defaultInfoStream; |
| |
| if (create) { |
| // Clear the write lock in case it's leftover: |
| @@ -410,13 +554,17 @@ |
| segmentInfos.read(directory); |
| } |
| |
| - // Create a deleter to keep track of which files can |
| - // be deleted: |
| - deleter = new IndexFileDeleter(segmentInfos, directory); |
| - deleter.setInfoStream(infoStream); |
| - deleter.findDeletableFiles(); |
| - deleter.deleteFiles(); |
| + this.autoCommit = autoCommit; |
| + if (!autoCommit) { |
| + rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); |
| + } |
| |
| + // Default deleter (for backwards compatibility) is |
| + // KeepOnlyLastCommitDeleter: |
| + deleter = new IndexFileDeleter(directory, |
| + deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, |
| + segmentInfos, infoStream); |
| + |
| } catch (IOException e) { |
| this.writeLock.release(); |
| this.writeLock = null; |
| @@ -533,11 +681,28 @@ |
| return mergeFactor; |
| } |
| |
| - /** If non-null, information about merges and a message when |
| - * maxFieldLength is reached will be printed to this. |
| + /** If non-null, this will be the default infoStream used |
| + * by a newly instantiated IndexWriter. |
| + * @see #setInfoStream |
| */ |
| + public static void setDefaultInfoStream(PrintStream infoStream) { |
| + IndexWriter.defaultInfoStream = infoStream; |
| + } |
| + |
| + /** |
| + * @see #setDefaultInfoStream |
| + */ |
| + public static PrintStream getDefaultInfoStream() { |
| + return IndexWriter.defaultInfoStream; |
| + } |
| + |
| + /** If non-null, information about merges, deletes and a |
| + * message when maxFieldLength is reached will be printed |
| + * to this. |
| + */ |
| public void setInfoStream(PrintStream infoStream) { |
| this.infoStream = infoStream; |
| + deleter.setInfoStream(infoStream); |
| } |
| |
| /** |
| @@ -613,6 +778,14 @@ |
| */ |
| public synchronized void close() throws CorruptIndexException, IOException { |
| flushRamSegments(); |
| + |
| + if (commitPending) { |
| + segmentInfos.write(directory); // now commit changes |
| + deleter.checkpoint(segmentInfos, true); |
| + commitPending = false; |
| + rollbackSegmentInfos = null; |
| + } |
| + |
| ramDirectory.close(); |
| if (writeLock != null) { |
| writeLock.release(); // release write lock |
| @@ -737,7 +910,9 @@ |
| dw.setInfoStream(infoStream); |
| String segmentName = newRamSegmentName(); |
| dw.addDocument(segmentName, doc); |
| - return new SegmentInfo(segmentName, 1, ramDirectory, false, false); |
| + SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false); |
| + si.setNumFields(dw.getNumFields()); |
| + return si; |
| } |
| |
| /** |
| @@ -871,6 +1046,7 @@ |
| |
| */ |
| private PrintStream infoStream = null; |
| + private static PrintStream defaultInfoStream = null; |
| |
| /** Merges all segments together into a single segment, |
| * optimizing an index for search. |
| @@ -949,21 +1125,18 @@ |
| * merges that happen (or ram segments flushed) will not |
| * write a new segments file and will not remove any files |
| * that were present at the start of the transaction. You |
| - * must make a matched (try/finall) call to |
| + * must make a matched (try/finally) call to |
| * commitTransaction() or rollbackTransaction() to finish |
| * the transaction. |
| */ |
| private void startTransaction() throws IOException { |
| - if (inTransaction) { |
| - throw new IOException("transaction is already in process"); |
| + localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); |
| + localAutoCommit = autoCommit; |
| + if (localAutoCommit) { |
| + flushRamSegments(); |
| + // Turn off auto-commit during our local transaction: |
| + autoCommit = false; |
| } |
| - rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); |
| - protectedSegments = new HashSet(); |
| - for(int i=0;i<segmentInfos.size();i++) { |
| - SegmentInfo si = (SegmentInfo) segmentInfos.elementAt(i); |
| - protectedSegments.add(si.name); |
| - } |
| - inTransaction = true; |
| } |
| |
| /* |
| @@ -972,20 +1145,21 @@ |
| */ |
| private void rollbackTransaction() throws IOException { |
| |
| + // First restore autoCommit in case we hit an exception below: |
| + autoCommit = localAutoCommit; |
| + |
| // Keep the same segmentInfos instance but replace all |
| // of its SegmentInfo instances. This is so the next |
| // attempt to commit using this instance of IndexWriter |
| // will always write to a new generation ("write once"). |
| segmentInfos.clear(); |
| - segmentInfos.addAll(rollbackSegmentInfos); |
| + segmentInfos.addAll(localRollbackSegmentInfos); |
| + localRollbackSegmentInfos = null; |
| |
| - // Ask deleter to locate unreferenced files & remove |
| - // them: |
| - deleter.clearPendingFiles(); |
| - deleter.findDeletableFiles(); |
| - deleter.deleteFiles(); |
| - |
| - clearTransaction(); |
| + // Ask deleter to locate unreferenced files we had |
| + // created & remove them: |
| + deleter.checkpoint(segmentInfos, false); |
| + deleter.refresh(); |
| } |
| |
| /* |
| @@ -994,35 +1168,79 @@ |
| * accumulated during the transaction |
| */ |
| private void commitTransaction() throws IOException { |
| - if (commitPending) { |
| - boolean success = false; |
| - try { |
| - // If we hit eg disk full during this write we have |
| - // to rollback.: |
| - segmentInfos.write(directory); // commit changes |
| - success = true; |
| - } finally { |
| - if (!success) { |
| - rollbackTransaction(); |
| - } |
| + |
| + // First restore autoCommit in case we hit an exception below: |
| + autoCommit = localAutoCommit; |
| + |
| + boolean success = false; |
| + try { |
| + checkpoint(); |
| + success = true; |
| + } finally { |
| + if (!success) { |
| + rollbackTransaction(); |
| } |
| - deleter.commitPendingFiles(); |
| - commitPending = false; |
| } |
| + localRollbackSegmentInfos = null; |
| |
| - clearTransaction(); |
| + // Give deleter a chance to remove files now: |
| + deleter.checkpoint(segmentInfos, autoCommit); |
| } |
| |
| - /* Should only be called by rollbackTransaction & |
| - * commitTransaction */ |
| - private void clearTransaction() { |
| - protectedSegments = null; |
| - rollbackSegmentInfos = null; |
| - inTransaction = false; |
| - } |
| + /** |
| + * Close the <code>IndexWriter</code> without committing |
| + * any of the changes that have occurred since it was |
| + * opened. This removes any temporary files that had been |
| + * created, after which the state of the index will be the |
| + * same as it was when this writer was first opened. This |
| + * can only be called when this IndexWriter was opened |
| + * with <code>autoCommit=false</code>. |
| + * @throws IllegalStateException if this is called when |
| + * the writer was opened with <code>autoCommit=true</code>. |
| + * @throws IOException if there is a low-level IO error |
| + */ |
| + public void abort() throws IOException { |
| + if (!autoCommit) { |
| |
| + // Keep the same segmentInfos instance but replace all |
| + // of its SegmentInfo instances. This is so the next |
| + // attempt to commit using this instance of IndexWriter |
| + // will always write to a new generation ("write once"). |
| + segmentInfos.clear(); |
| + segmentInfos.addAll(rollbackSegmentInfos); |
| |
| + // Ask deleter to locate unreferenced files & remove |
| + // them: |
| + deleter.checkpoint(segmentInfos, false); |
| + deleter.refresh(); |
| |
| + ramSegmentInfos = new SegmentInfos(); |
| + bufferedDeleteTerms.clear(); |
| + numBufferedDeleteTerms = 0; |
| + |
| + commitPending = false; |
| + close(); |
| + |
| + } else { |
| + throw new IllegalStateException("abort() can only be called when IndexWriter was opened with autoCommit=false"); |
| + } |
| + } |
| + |
| + /* |
| + * Called whenever the SegmentInfos has been updated and |
| + * the index files referenced exist (correctly) in the |
| + * index directory. If we are in autoCommit mode, we |
| + * commit the change immediately. Else, we mark |
| + * commitPending. |
| + */ |
| + private void checkpoint() throws IOException { |
| + if (autoCommit) { |
| + segmentInfos.write(directory); |
| + } else { |
| + commitPending = true; |
| + } |
| + } |
| + |
| /** Merges all segments from an array of indexes into this index. |
| * |
| * <p>This may be used to parallelize batch indexing. A large document |
| @@ -1266,16 +1484,13 @@ |
| final String mergedName = newSegmentName(); |
| SegmentMerger merger = new SegmentMerger(this, mergedName); |
| |
| - final Vector segmentsToDelete = new Vector(); |
| SegmentInfo info; |
| - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); |
| |
| IndexReader sReader = null; |
| try { |
| if (segmentInfos.size() == 1){ // add existing index, if any |
| sReader = SegmentReader.get(segmentInfos.info(0)); |
| merger.add(sReader); |
| - segmentsToDelete.addElement(sReader); // queue segment for deletion |
| } |
| |
| for (int i = 0; i < readers.length; i++) // add new indexes |
| @@ -1288,16 +1503,15 @@ |
| try { |
| int docCount = merger.merge(); // merge 'em |
| |
| - segmentInfos.setSize(0); // pop old infos & add new |
| - info = new SegmentInfo(mergedName, docCount, directory, false, true); |
| - segmentInfos.addElement(info); |
| - commitPending = true; |
| - |
| if(sReader != null) { |
| sReader.close(); |
| sReader = null; |
| } |
| |
| + segmentInfos.setSize(0); // pop old infos & add new |
| + info = new SegmentInfo(mergedName, docCount, directory, false, true); |
| + segmentInfos.addElement(info); |
| + |
| success = true; |
| |
| } finally { |
| @@ -1312,26 +1526,16 @@ |
| sReader.close(); |
| } |
| } |
| + |
| + if (useCompoundFile) { |
| |
| - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file |
| - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments |
| - |
| - if (useCompoundFile) { |
| boolean success = false; |
| |
| - segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); |
| - Vector filesToDelete; |
| - |
| startTransaction(); |
| |
| try { |
| - |
| - filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); |
| - |
| + merger.createCompoundFile(mergedName + ".cfs"); |
| info.setUseCompoundFile(true); |
| - commitPending = true; |
| - success = true; |
| - |
| } finally { |
| if (!success) { |
| rollbackTransaction(); |
| @@ -1339,9 +1543,6 @@ |
| commitTransaction(); |
| } |
| } |
| - |
| - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file |
| - deleter.deleteFiles(filesToDelete); // delete now unused files of segment |
| } |
| } |
| |
| @@ -1500,14 +1701,12 @@ |
| final String mergedName = newSegmentName(); |
| SegmentMerger merger = null; |
| |
| - final Vector segmentsToDelete = new Vector(); |
| + final List ramSegmentsToDelete = new ArrayList(); |
| |
| - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); |
| - String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); |
| - |
| SegmentInfo newSegment = null; |
| |
| int mergedDocCount = 0; |
| + boolean anyDeletes = (bufferedDeleteTerms.size() != 0); |
| |
| // This is try/finally to make sure merger's readers are closed: |
| try { |
| @@ -1522,9 +1721,9 @@ |
| infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); |
| IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet) |
| merger.add(reader); |
| - if ((reader.directory() == this.directory) || // if we own the directory |
| - (reader.directory() == this.ramDirectory)) |
| - segmentsToDelete.addElement(reader); // queue segment for deletion |
| + if (reader.directory() == this.ramDirectory) { |
| + ramSegmentsToDelete.add(si); |
| + } |
| } |
| } |
| |
| @@ -1545,9 +1744,8 @@ |
| newSegment = new SegmentInfo(mergedName, mergedDocCount, |
| directory, false, true); |
| } |
| - |
| - if (!inTransaction |
| - && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() > 0)) { |
| + |
| + if (sourceSegments != ramSegmentInfos || anyDeletes) { |
| // Now save the SegmentInfo instances that |
| // we are replacing: |
| rollback = (SegmentInfos) segmentInfos.clone(); |
| @@ -1565,19 +1763,12 @@ |
| } |
| |
| if (sourceSegments == ramSegmentInfos) { |
| - // Should not be necessary: no prior commit should |
| - // have left pending files, so just defensive: |
| - deleter.clearPendingFiles(); |
| maybeApplyDeletes(doMerge); |
| doAfterFlush(); |
| } |
| + |
| + checkpoint(); |
| |
| - if (!inTransaction) { |
| - segmentInfos.write(directory); // commit before deleting |
| - } else { |
| - commitPending = true; |
| - } |
| - |
| success = true; |
| |
| } finally { |
| @@ -1589,11 +1780,10 @@ |
| if (sourceSegments == ramSegmentInfos) { |
| ramSegmentInfos.removeAllElements(); |
| } |
| - } else if (!inTransaction) { |
| + } else { |
| |
| // Must rollback so our state matches index: |
| - |
| - if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size()) { |
| + if (sourceSegments == ramSegmentInfos && !anyDeletes) { |
| // Simple case: newSegment may or may not have |
| // been added to the end of our segment infos, |
| // so just check & remove if so: |
| @@ -1611,14 +1801,8 @@ |
| segmentInfos.addAll(rollback); |
| } |
| |
| - // Erase any pending files that we were going to delete: |
| - // i.e. old del files added by SegmentReader.doCommit() |
| - deleter.clearPendingFiles(); |
| - |
| - // Delete any partially created files: |
| - deleter.deleteFile(nextSegmentsFileName); |
| - deleter.findDeletableFiles(); |
| - deleter.deleteFiles(); |
| + // Delete any partially created and now unreferenced files: |
| + deleter.refresh(); |
| } |
| } |
| } finally { |
| @@ -1626,53 +1810,33 @@ |
| if (doMerge) merger.closeReaders(); |
| } |
| |
| - if (!inTransaction) { |
| - // Attempt to delete all files we just obsoleted: |
| - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file |
| - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments |
| - // Includes the old del files |
| - deleter.commitPendingFiles(); |
| - } else { |
| - deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file |
| - deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments |
| - } |
| + // Delete the RAM segments |
| + deleter.deleteDirect(ramDirectory, ramSegmentsToDelete); |
| |
| + // Give deleter a chance to remove files now. |
| + deleter.checkpoint(segmentInfos, autoCommit); |
| + |
| if (useCompoundFile && doMerge) { |
| |
| - segmentsInfosFileName = nextSegmentsFileName; |
| - nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); |
| - |
| - Vector filesToDelete; |
| - |
| boolean success = false; |
| |
| try { |
| |
| - filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); |
| + merger.createCompoundFile(mergedName + ".cfs"); |
| newSegment.setUseCompoundFile(true); |
| - if (!inTransaction) { |
| - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file |
| - } |
| + checkpoint(); |
| success = true; |
| |
| } finally { |
| - if (!success && !inTransaction) { |
| + if (!success) { |
| // Must rollback: |
| newSegment.setUseCompoundFile(false); |
| - deleter.deleteFile(mergedName + ".cfs"); |
| - deleter.deleteFile(nextSegmentsFileName); |
| + deleter.refresh(); |
| } |
| } |
| - |
| - if (!inTransaction) { |
| - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file |
| - } |
| - |
| - // We can delete these segments whether or not we are |
| - // in a transaction because we had just written them |
| - // above so they can't need protection by the |
| - // transaction: |
| - deleter.deleteFiles(filesToDelete); // delete now-unused segments |
| + |
| + // Give deleter a chance to remove files now. |
| + deleter.checkpoint(segmentInfos, autoCommit); |
| } |
| |
| return mergedDocCount; |
| @@ -1692,7 +1856,6 @@ |
| IndexReader reader = null; |
| try { |
| reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1)); |
| - reader.setDeleter(deleter); |
| |
| // Apply delete terms to the segment just flushed from ram |
| // apply appropriately so that a delete term is only applied to |
| @@ -1718,7 +1881,6 @@ |
| IndexReader reader = null; |
| try { |
| reader = SegmentReader.get(segmentInfos.info(i)); |
| - reader.setDeleter(deleter); |
| |
| // Apply delete terms to disk segments |
| // except the one just flushed from ram. |
| @@ -1769,7 +1931,7 @@ |
| } |
| |
| // Number of ram segments a delete term applies to. |
| - private class Num { |
| + private static class Num { |
| private int num; |
| |
| Num(int num) { |
| Index: src/java/org/apache/lucene/index/DocumentWriter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/DocumentWriter.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy) |
| @@ -388,6 +388,9 @@ |
| this.infoStream = infoStream; |
| } |
| |
| + int getNumFields() { |
| + return fieldInfos.size(); |
| + } |
| } |
| |
| final class Posting { // info about a Term in a doc |
| Index: src/java/org/apache/lucene/index/IndexFileDeleter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) |
| @@ -18,284 +18,484 @@ |
| */ |
| |
| import org.apache.lucene.index.IndexFileNames; |
| -import org.apache.lucene.index.IndexFileNameFilter; |
| import org.apache.lucene.index.SegmentInfos; |
| +import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.store.Directory; |
| |
| import java.io.IOException; |
| import java.io.PrintStream; |
| -import java.util.Vector; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| +import java.util.List; |
| +import java.util.ArrayList; |
| +import java.util.Collections; |
| |
| -/** |
| - * A utility class (used by both IndexReader and |
| - * IndexWriter) to keep track of files that need to be |
| - * deleted because they are no longer referenced by the |
| - * index. |
| +/* |
| + * This class keeps track of each SegmentInfos instance that |
| + * is still "live", either because it corresponds to a |
| + * segments_N in the Directory (a real commit) or because |
| + * it's the in-memory SegmentInfos that a writer is actively |
| + * updating but has not yet committed (currently this only |
| + * applies when autoCommit=false in IndexWriter). This |
| + * class uses simple reference counting to map the live |
| + * SegmentInfos instances to individual files in the |
| + * Directory. |
| + * |
| + * A separate deletion policy interface |
| + * (IndexDeletionPolicy) is consulted on creation (onInit) |
| + * and once per commit (onCommit), to decide when a commit |
| + * should be removed. |
| + * |
| + * The current default deletion policy is {@link |
| + * KeepOnlyLastCommitDeletionPolicy}, which removes all |
| + * prior commits when a new commit has completed. This |
| + * matches the behavior before 2.2. |
| + * |
| + * Note that you must hold the write.lock before |
| + * instantiating this class. It opens segments_N file(s) |
| + * directly with no retry logic. |
| */ |
| + |
| final class IndexFileDeleter { |
| - private Vector deletable; |
| - private HashSet pending; |
| + |
| + /* Files that we tried to delete but failed (likely |
| + * because they are open and we are running on Windows), |
| + * so we will retry them again later: */ |
| + private List deletable; |
| + |
| + /* Reference count for all files in the index. Maps |
| + * String to RefCount (class below) instances: */ |
| + private HashMap refCounts = new HashMap(); |
| + |
| + /* Holds all commits (segments_N) currently in the index. |
| + * This will have just 1 commit if you are using the |
| + * default delete policy (KeepOnlyLastCommitDeletionPolicy). |
| + * Other policies may leave commit points live for longer |
| + * in which case this list would be longer than 1: */ |
| + private List commits = new ArrayList(); |
| + |
| + /* Holds files we had incref'd from the previous |
| + * non-commit checkpoint: */ |
| + private List lastFiles = new ArrayList(); |
| + |
| + private PrintStream infoStream; |
| + private List toDelete = new ArrayList(); |
| private Directory directory; |
| - private SegmentInfos segmentInfos; |
| - private PrintStream infoStream; |
| + private IndexDeletionPolicy policy; |
| |
| - IndexFileDeleter(SegmentInfos segmentInfos, Directory directory) |
| - throws IOException { |
| - this.segmentInfos = segmentInfos; |
| - this.directory = directory; |
| - } |
| - void setSegmentInfos(SegmentInfos segmentInfos) { |
| - this.segmentInfos = segmentInfos; |
| - } |
| - SegmentInfos getSegmentInfos() { |
| - return segmentInfos; |
| - } |
| - |
| void setInfoStream(PrintStream infoStream) { |
| this.infoStream = infoStream; |
| } |
| + |
| + private void message(String message) { |
| + infoStream.println(this + " " + Thread.currentThread().getName() + ": " + message); |
| + } |
| |
| - /** Determine index files that are no longer referenced |
| - * and therefore should be deleted. This is called once |
| - * (by the writer), and then subsequently we add onto |
| - * deletable any files that are no longer needed at the |
| - * point that we create the unused file (eg when merging |
| - * segments), and we only remove from deletable when a |
| - * file is successfully deleted. |
| + /** |
| + * Initialize the deleter: find all previous commits in |
| + * the Directory, incref the files they reference, call |
| + * the policy to let it delete commits. The incoming |
| + * segmentInfos must have been loaded from a commit point |
| + * and not yet modified. This will remove any files not |
| + * referenced by any of the commits. |
| + * @throws CorruptIndexException if the index is corrupt |
| + * @throws IOException if there is a low-level IO error |
| */ |
| + public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream) |
| + throws CorruptIndexException, IOException { |
| |
| - void findDeletableFiles() throws IOException { |
| + this.infoStream = infoStream; |
| + this.policy = policy; |
| + this.directory = directory; |
| |
| - // Gather all "current" segments: |
| - HashMap current = new HashMap(); |
| - for(int j=0;j<segmentInfos.size();j++) { |
| - SegmentInfo segmentInfo = (SegmentInfo) segmentInfos.elementAt(j); |
| - current.put(segmentInfo.name, segmentInfo); |
| - } |
| - |
| - // Then go through all files in the Directory that are |
| - // Lucene index files, and add to deletable if they are |
| - // not referenced by the current segments info: |
| - |
| - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); |
| + // First pass: walk the files and initialize our ref |
| + // counts: |
| + long currentGen = segmentInfos.getGeneration(); |
| IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); |
| |
| String[] files = directory.list(); |
| |
| - for (int i = 0; i < files.length; i++) { |
| + CommitPoint currentCommitPoint = null; |
| |
| - if (filter.accept(null, files[i]) && !files[i].equals(segmentsInfosFileName) && !files[i].equals(IndexFileNames.SEGMENTS_GEN)) { |
| + for(int i=0;i<files.length;i++) { |
| |
| - String segmentName; |
| - String extension; |
| + String fileName = files[i]; |
| |
| - // First remove any extension: |
| - int loc = files[i].indexOf('.'); |
| - if (loc != -1) { |
| - extension = files[i].substring(1+loc); |
| - segmentName = files[i].substring(0, loc); |
| - } else { |
| - extension = null; |
| - segmentName = files[i]; |
| - } |
| + if (filter.accept(null, fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { |
| |
| - // Then, remove any generation count: |
| - loc = segmentName.indexOf('_', 1); |
| - if (loc != -1) { |
| - segmentName = segmentName.substring(0, loc); |
| - } |
| + // Add this file to refCounts with initial count 0: |
| + getRefCount(fileName); |
| |
| - // Delete this file if it's not a "current" segment, |
| - // or, it is a single index file but there is now a |
| - // corresponding compound file: |
| - boolean doDelete = false; |
| + if (fileName.startsWith(IndexFileNames.SEGMENTS)) { |
| |
| - if (!current.containsKey(segmentName)) { |
| - // Delete if segment is not referenced: |
| - doDelete = true; |
| - } else { |
| - // OK, segment is referenced, but file may still |
| - // be orphan'd: |
| - SegmentInfo info = (SegmentInfo) current.get(segmentName); |
| - |
| - if (filter.isCFSFile(files[i]) && info.getUseCompoundFile()) { |
| - // This file is in fact stored in a CFS file for |
| - // this segment: |
| - doDelete = true; |
| - } else { |
| - |
| - if ("del".equals(extension)) { |
| - // This is a _segmentName_N.del file: |
| - if (!files[i].equals(info.getDelFileName())) { |
| - // If this is a seperate .del file, but it |
| - // doesn't match the current del filename for |
| - // this segment, then delete it: |
| - doDelete = true; |
| - } |
| - } else if (extension != null && extension.startsWith("s") && extension.matches("s\\d+")) { |
| - int field = Integer.parseInt(extension.substring(1)); |
| - // This is a _segmentName_N.sX file: |
| - if (!files[i].equals(info.getNormFileName(field))) { |
| - // This is an orphan'd separate norms file: |
| - doDelete = true; |
| - } |
| - } else if ("cfs".equals(extension) && !info.getUseCompoundFile()) { |
| - // This is a partially written |
| - // _segmentName.cfs: |
| - doDelete = true; |
| + // This is a commit (segments or segments_N), and |
| + // it's valid (<= the max gen). Load it, then |
| + // incref all files it refers to: |
| + if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) { |
| + if (infoStream != null) { |
| + message("init: load commit \"" + fileName + "\""); |
| } |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(directory, fileName); |
| + CommitPoint commitPoint = new CommitPoint(sis); |
| + if (sis.getGeneration() == segmentInfos.getGeneration()) { |
| + currentCommitPoint = commitPoint; |
| + } |
| + commits.add(commitPoint); |
| + incRef(sis, true); |
| } |
| } |
| + } |
| + } |
| |
| - if (doDelete) { |
| - addDeletableFile(files[i]); |
| - if (infoStream != null) { |
| - infoStream.println("IndexFileDeleter: file \"" + files[i] + "\" is unreferenced in index and will be deleted on next commit"); |
| - } |
| + if (currentCommitPoint == null) { |
| + throw new CorruptIndexException("failed to locate current segments_N file"); |
| + } |
| + |
| + // We keep commits list in sorted order (oldest to newest): |
| + Collections.sort(commits); |
| + |
| + // Now delete anything with ref count at 0. These are |
| + // presumably abandoned files eg due to crash of |
| + // IndexWriter. |
| + Iterator it = refCounts.keySet().iterator(); |
| + while(it.hasNext()) { |
| + String fileName = (String) it.next(); |
| + RefCount rc = (RefCount) refCounts.get(fileName); |
| + if (0 == rc.count) { |
| + if (infoStream != null) { |
| + message("init: removing unreferenced file \"" + fileName + "\""); |
| } |
| + deleteFile(fileName); |
| } |
| } |
| + |
| + // Finally, give policy a chance to remove things on |
| + // startup: |
| + policy.onInit(commits); |
| + |
| + // It's OK for the onInit to remove the current commit |
| + // point; we just have to checkpoint our in-memory |
| + // SegmentInfos to protect those files that it uses: |
| + if (currentCommitPoint.deleted) { |
| + checkpoint(segmentInfos, false); |
| + } |
| + |
| + deleteCommits(); |
| } |
| |
| - /* |
| - * Some operating systems (e.g. Windows) don't permit a file to be deleted |
| - * while it is opened for read (e.g. by another process or thread). So we |
| - * assume that when a delete fails it is because the file is open in another |
| - * process, and queue the file for subsequent deletion. |
| + /** |
| + * Remove the CommitPoints in the toDelete List by |
| + * DecRef'ing all files from each SegmentInfos. |
| */ |
| + private void deleteCommits() throws IOException { |
| |
| - void deleteSegments(Vector segments) throws IOException { |
| + int size = toDelete.size(); |
| |
| - deleteFiles(); // try to delete files that we couldn't before |
| + if (size > 0) { |
| |
| - for (int i = 0; i < segments.size(); i++) { |
| - SegmentReader reader = (SegmentReader)segments.elementAt(i); |
| - if (reader.directory() == this.directory) |
| - deleteFiles(reader.files()); // try to delete our files |
| - else |
| - deleteFiles(reader.files(), reader.directory()); // delete other files |
| + // First decref all files that had been referred to by |
| + // the now-deleted commits: |
| + for(int i=0;i<size;i++) { |
| + CommitPoint commit = (CommitPoint) toDelete.get(i); |
| + if (infoStream != null) { |
| + message("deleteCommits: now remove commit \"" + commit.getSegmentsFileName() + "\""); |
| + } |
| + int size2 = commit.files.size(); |
| + for(int j=0;j<size2;j++) { |
| + decRef((List) commit.files.get(j)); |
| + } |
| + decRef(commit.getSegmentsFileName()); |
| + } |
| + toDelete.clear(); |
| + |
| + // Now compact commits to remove deleted ones: |
| + size = commits.size(); |
| + int readFrom = 0; |
| + int writeTo = 0; |
| + while(readFrom < size) { |
| + CommitPoint commit = (CommitPoint) commits.get(readFrom); |
| + if (!commit.deleted) { |
| + if (writeTo != readFrom) { |
| + commits.set(writeTo, commits.get(readFrom)); |
| + } |
| + writeTo++; |
| + } |
| + readFrom++; |
| + } |
| + |
| + while(size > writeTo) { |
| + commits.remove(size-1); |
| + size--; |
| + } |
| } |
| } |
| |
| /** |
| - * Delete these segments, as long as they are not listed |
| - * in protectedSegments. If they are, then, instead, add |
| - * them to the pending set. |
| - */ |
| - |
| - void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException { |
| + * Writer calls this when it has hit an error and had to |
| + * roll back, to tell us that there may now be |
| + * unreferenced files in the filesystem. So we re-list |
| + * the filesystem and delete such files: |
| + */ |
| + public void refresh() throws IOException { |
| + String[] files = directory.list(); |
| + IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); |
| + for(int i=0;i<files.length;i++) { |
| + String fileName = files[i]; |
| + if (filter.accept(null, fileName) && !refCounts.containsKey(fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { |
| + // Unreferenced file, so remove it |
| + if (infoStream != null) { |
| + message("refresh: removing newly created unreferenced file \"" + fileName + "\""); |
| + } |
| + deleteFile(fileName); |
| + } |
| + } |
| + } |
| |
| - deleteFiles(); // try to delete files that we couldn't before |
| + /** |
| + * Writer calls this when it has made a "consistent |
| + * change" to the index, meaning new files are written to |
| + * the index and the in-memory SegmentInfos have been |
| + * modified to point to those files. |
| + * |
| + * This may or may not be a commit (segments_N may or may |
| + * not have been written). |
| + * |
| + * We simply incref the files referenced by the new |
| + * SegmentInfos and decref the files we had previously |
| + * seen (if any). |
| + * |
| + * If this is a commit, we also call the policy to give it |
| + * a chance to remove other commits. If any commits are |
| + * removed, we decref their files as well. |
| + */ |
| + public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException { |
| |
| - for (int i = 0; i < segments.size(); i++) { |
| - SegmentReader reader = (SegmentReader)segments.elementAt(i); |
| - if (reader.directory() == this.directory) { |
| - if (protectedSegments.contains(reader.getSegmentName())) { |
| - addPendingFiles(reader.files()); // record these for deletion on commit |
| - } else { |
| - deleteFiles(reader.files()); // try to delete our files |
| + if (infoStream != null) { |
| + message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [isCommit = " + isCommit + "]"); |
| + } |
| + |
| + // Try again now to delete any previously un-deletable |
| + // files (because they were in use, on Windows): |
| + if (deletable != null) { |
| + List oldDeletable = deletable; |
| + deletable = null; |
| + int size = oldDeletable.size(); |
| + for(int i=0;i<size;i++) { |
| + deleteFile((String) oldDeletable.get(i)); |
| + } |
| + } |
| + |
| + // Incref the files: |
| + incRef(segmentInfos, isCommit); |
| + |
| + if (isCommit) { |
| + // Append to our commits list: |
| + commits.add(new CommitPoint(segmentInfos)); |
| + |
| + // Tell policy so it can remove commits: |
| + policy.onCommit(commits); |
| + |
| + // Decref files for commits that were deleted by the policy: |
| + deleteCommits(); |
| + } |
| + |
| + // DecRef old files from the last checkpoint, if any: |
| + int size = lastFiles.size(); |
| + if (size > 0) { |
| + for(int i=0;i<size;i++) { |
| + decRef((List) lastFiles.get(i)); |
| + } |
| + lastFiles.clear(); |
| + } |
| + |
| + if (!isCommit) { |
| + // Save files so we can decr on next checkpoint/commit: |
| + size = segmentInfos.size(); |
| + for(int i=0;i<size;i++) { |
| + SegmentInfo segmentInfo = segmentInfos.info(i); |
| + if (segmentInfo.dir == directory) { |
| + lastFiles.add(segmentInfo.files()); |
| } |
| - } else { |
| - deleteFiles(reader.files(), reader.directory()); // delete other files |
| } |
| } |
| } |
| - |
| - void deleteFiles(Vector files, Directory directory) |
| - throws IOException { |
| - for (int i = 0; i < files.size(); i++) |
| - directory.deleteFile((String)files.elementAt(i)); |
| + |
| + private void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException { |
| + int size = segmentInfos.size(); |
| + for(int i=0;i<size;i++) { |
| + SegmentInfo segmentInfo = segmentInfos.info(i); |
| + if (segmentInfo.dir == directory) { |
| + incRef(segmentInfo.files()); |
| + } |
| + } |
| + |
| + if (isCommit) { |
| + // Since this is a commit point, also incref its |
| + // segments_N file: |
| + getRefCount(segmentInfos.getCurrentSegmentFileName()).IncRef(); |
| + } |
| } |
| |
| - void deleteFiles(Vector files) |
| - throws IOException { |
| - deleteFiles(); // try to delete files that we couldn't before |
| - for (int i = 0; i < files.size(); i++) { |
| - deleteFile((String) files.elementAt(i)); |
| + private void incRef(List files) throws IOException { |
| + int size = files.size(); |
| + for(int i=0;i<size;i++) { |
| + String fileName = (String) files.get(i); |
| + RefCount rc = getRefCount(fileName); |
| + if (infoStream != null) { |
| + message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count); |
| + } |
| + rc.IncRef(); |
| } |
| } |
| |
| - void deleteFile(String file) |
| + private void decRef(List files) throws IOException { |
| + int size = files.size(); |
| + for(int i=0;i<size;i++) { |
| + decRef((String) files.get(i)); |
| + } |
| + } |
| + |
| + private void decRef(String fileName) throws IOException { |
| + RefCount rc = getRefCount(fileName); |
| + if (infoStream != null) { |
| + message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count); |
| + } |
| + if (0 == rc.DecRef()) { |
| + // This file is no longer referenced by any past |
| + // commit points nor by the in-memory SegmentInfos: |
| + deleteFile(fileName); |
| + refCounts.remove(fileName); |
| + } |
| + } |
| + |
| + private RefCount getRefCount(String fileName) { |
| + RefCount rc; |
| + if (!refCounts.containsKey(fileName)) { |
| + rc = new RefCount(); |
| + refCounts.put(fileName, rc); |
| + } else { |
| + rc = (RefCount) refCounts.get(fileName); |
| + } |
| + return rc; |
| + } |
| + |
| + private void deleteFile(String fileName) |
| throws IOException { |
| try { |
| - directory.deleteFile(file); // try to delete each file |
| + if (infoStream != null) { |
| + message("delete \"" + fileName + "\""); |
| + } |
| + directory.deleteFile(fileName); |
| } catch (IOException e) { // if delete fails |
| - if (directory.fileExists(file)) { |
| - if (infoStream != null) |
| - infoStream.println("IndexFileDeleter: unable to remove file \"" + file + "\": " + e.toString() + "; Will re-try later."); |
| - addDeletableFile(file); // add to deletable |
| + if (directory.fileExists(fileName)) { |
| + |
| + // Some operating systems (e.g. Windows) don't |
| + // permit a file to be deleted while it is opened |
| + // for read (e.g. by another process or thread). So |
| + // we assume that when a delete fails it is because |
| + // the file is open in another process, and queue |
| + // the file for subsequent deletion. |
| + |
| + if (infoStream != null) { |
| + message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later."); |
| + } |
| + if (deletable == null) { |
| + deletable = new ArrayList(); |
| + } |
| + deletable.add(fileName); // add to deletable |
| } |
| } |
| } |
| |
| - void clearPendingFiles() { |
| - pending = null; |
| - } |
| - |
| - /* |
| - Record that the files for these segments should be |
| - deleted, once the pending deletes are committed. |
| + /** |
| + * Blindly delete the files used by the specific segments, |
| + * with no reference counting and no retry. This is only |
| + * currently used by writer to delete its RAM segments |
| + * from a RAMDirectory. |
| */ |
| - void addPendingSegments(Vector segments) throws IOException { |
| - for (int i = 0; i < segments.size(); i++) { |
| - SegmentReader reader = (SegmentReader)segments.elementAt(i); |
| - if (reader.directory() == this.directory) { |
| - addPendingFiles(reader.files()); |
| + public void deleteDirect(Directory otherDir, List segments) throws IOException { |
| + int size = segments.size(); |
| + for(int i=0;i<size;i++) { |
| + List toDelete = ((SegmentInfo) segments.get(i)).files(); |
| + int size2 = toDelete.size(); |
| + for(int j=0;j<size2;j++) { |
| + otherDir.deleteFile((String) toDelete.get(j)); |
| } |
| } |
| } |
| |
| - /* |
| - Record list of files for deletion, but do not delete |
| - them until commitPendingFiles is called. |
| - */ |
| - void addPendingFiles(Vector files) { |
| - for(int i=0;i<files.size();i++) { |
| - addPendingFile((String) files.elementAt(i)); |
| + /** |
| + * Tracks the reference count for a single index file: |
| + */ |
| + final private static class RefCount { |
| + |
| + int count; |
| + |
| + final private int IncRef() { |
| + return ++count; |
| } |
| - } |
| |
| - /* |
| - Record a file for deletion, but do not delete it until |
| - commitPendingFiles is called. |
| - */ |
| - void addPendingFile(String fileName) { |
| - if (pending == null) { |
| - pending = new HashSet(); |
| + final private int DecRef() { |
| + return --count; |
| } |
| - pending.add(fileName); |
| } |
| |
| - void commitPendingFiles() throws IOException { |
| - if (pending != null) { |
| - if (deletable == null) { |
| - deletable = new Vector(); |
| + /** |
| + * Holds details for each commit point. This class is |
| + * also passed to the deletion policy. Note: this class |
| + * has a natural ordering that is inconsistent with |
| + * equals. |
| + */ |
| + |
| + final private class CommitPoint implements Comparable, IndexCommitPoint { |
| + |
| + long gen; |
| + List files; |
| + String segmentsFileName; |
| + boolean deleted; |
| + |
| + public CommitPoint(SegmentInfos segmentInfos) throws IOException { |
| + segmentsFileName = segmentInfos.getCurrentSegmentFileName(); |
| + int size = segmentInfos.size(); |
| + files = new ArrayList(size); |
| + gen = segmentInfos.getGeneration(); |
| + for(int i=0;i<size;i++) { |
| + SegmentInfo segmentInfo = segmentInfos.info(i); |
| + if (segmentInfo.dir == directory) { |
| + files.add(segmentInfo.files()); |
| + } |
| } |
| - Iterator it = pending.iterator(); |
| - while(it.hasNext()) { |
| - deletable.addElement(it.next()); |
| - } |
| - pending = null; |
| - deleteFiles(); |
| } |
| - } |
| |
| - void addDeletableFile(String fileName) { |
| - if (deletable == null) { |
| - deletable = new Vector(); |
| + /** |
| + * Get the segments_N file for this commit point. |
| + */ |
| + public String getSegmentsFileName() { |
| + return segmentsFileName; |
| } |
| - deletable.addElement(fileName); |
| - } |
| |
| - void deleteFiles() |
| - throws IOException { |
| - if (deletable != null) { |
| - Vector oldDeletable = deletable; |
| - deletable = null; |
| - deleteFiles(oldDeletable); // try to delete deletable |
| + /** |
| + * Called only be the deletion policy, to remove this |
| + * commit point from the index. |
| + */ |
| + public void delete() { |
| + if (!deleted) { |
| + deleted = true; |
| + toDelete.add(this); |
| + } |
| } |
| + |
| + public int compareTo(Object obj) { |
| + CommitPoint commit = (CommitPoint) obj; |
| + if (gen < commit.gen) { |
| + return -1; |
| + } else if (gen > commit.gen) { |
| + return 1; |
| + } else { |
| + return 0; |
| + } |
| + } |
| } |
| } |
| Index: src/java/org/apache/lucene/index/SegmentInfo.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) |
| @@ -21,6 +21,8 @@ |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.IndexInput; |
| import java.io.IOException; |
| +import java.util.List; |
| +import java.util.ArrayList; |
| |
| final class SegmentInfo { |
| public String name; // unique name in dir |
| @@ -50,6 +52,9 @@ |
| // and true for newly created merged segments (both |
| // compound and non compound). |
| |
| + private List files; // cached list of files that this segment uses |
| + // in the Directory |
| + |
| public SegmentInfo(String name, int docCount, Directory dir) { |
| this.name = name; |
| this.docCount = docCount; |
| @@ -71,6 +76,7 @@ |
| * Copy everything from src SegmentInfo into our instance. |
| */ |
| void reset(SegmentInfo src) { |
| + files = null; |
| name = src.name; |
| docCount = src.docCount; |
| dir = src.dir; |
| @@ -134,7 +140,7 @@ |
| |
| if (!preLockless) { |
| // This is a FORMAT_LOCKLESS segment, which means |
| - // there are no norms: |
| + // there are no separate norms: |
| for(int i=0;i<numFields;i++) { |
| normGen[i] = -1; |
| } |
| @@ -174,10 +180,12 @@ |
| } else { |
| delGen++; |
| } |
| + files = null; |
| } |
| |
| void clearDelGen() { |
| delGen = -1; |
| + files = null; |
| } |
| |
| public Object clone () { |
| @@ -199,7 +207,7 @@ |
| return null; |
| } else { |
| // If delGen is 0, it's the pre-lockless-commit file format |
| - return IndexFileNames.fileNameFromGeneration(name, ".del", delGen); |
| + return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); |
| } |
| } |
| |
| @@ -280,6 +288,7 @@ |
| } else { |
| normGen[fieldIndex]++; |
| } |
| + files = null; |
| } |
| |
| /** |
| @@ -326,6 +335,7 @@ |
| } else { |
| this.isCompoundFile = -1; |
| } |
| + files = null; |
| } |
| |
| /** |
| @@ -338,7 +348,7 @@ |
| } else if (isCompoundFile == 1) { |
| return true; |
| } else { |
| - return dir.fileExists(name + ".cfs"); |
| + return dir.fileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); |
| } |
| } |
| |
| @@ -361,4 +371,87 @@ |
| } |
| output.writeByte(isCompoundFile); |
| } |
| + |
| + /* |
| + * Return all files referenced by this SegmentInfo. The |
| + * returns List is a locally cached List so you should not |
| + * modify it. |
| + */ |
| + |
| + public List files() throws IOException { |
| + |
| + if (files != null) { |
| + // Already cached: |
| + return files; |
| + } |
| + |
| + files = new ArrayList(); |
| + |
| + boolean useCompoundFile = getUseCompoundFile(); |
| + |
| + if (useCompoundFile) { |
| + files.add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); |
| + } else { |
| + for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) { |
| + String ext = IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]; |
| + String fileName = name + "." + ext; |
| + if (dir.fileExists(fileName)) { |
| + files.add(fileName); |
| + } |
| + } |
| + } |
| + |
| + String delFileName = IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); |
| + if (delFileName != null && (delGen > 0 || dir.fileExists(delFileName))) { |
| + files.add(delFileName); |
| + } |
| + |
| + // Careful logic for norms files: |
| + if (normGen != null) { |
| + for(int i=0;i<normGen.length;i++) { |
| + long gen = normGen[i]; |
| + if (gen > 0) { |
| + // Definitely a separate norm file, with generation: |
| + files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); |
| + } else if (-1 == gen) { |
| + // No separate norms but maybe non-separate norms |
| + // in the non compound file case: |
| + if (!hasSingleNormFile && !useCompoundFile) { |
| + String fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; |
| + if (dir.fileExists(fileName)) { |
| + files.add(fileName); |
| + } |
| + } |
| + } else if (0 == gen) { |
| + // Pre-2.1: we have to check file existence |
| + String fileName = null; |
| + if (useCompoundFile) { |
| + fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; |
| + } else if (!hasSingleNormFile) { |
| + fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; |
| + } |
| + if (fileName != null && dir.fileExists(fileName)) { |
| + files.add(fileName); |
| + } |
| + } |
| + } |
| + } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { |
| + // Pre-2.1: we have to scan the dir to find all |
| + // matching _X.sN/_X.fN files for our segment: |
| + String prefix; |
| + if (useCompoundFile) |
| + prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; |
| + else |
| + prefix = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION; |
| + int prefixLength = prefix.length(); |
| + String[] allFiles = dir.list(); |
| + for(int i=0;i<allFiles.length;i++) { |
| + String fileName = allFiles[i]; |
| + if (fileName.length() > prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) { |
| + files.add(fileName); |
| + } |
| + } |
| + } |
| + return files; |
| + } |
| } |
| Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexFileNameFilter.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (working copy) |
| @@ -31,12 +31,17 @@ |
| |
| static IndexFileNameFilter singleton = new IndexFileNameFilter(); |
| private HashSet extensions; |
| + private HashSet extensionsInCFS; |
| |
| public IndexFileNameFilter() { |
| extensions = new HashSet(); |
| for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { |
| extensions.add(IndexFileNames.INDEX_EXTENSIONS[i]); |
| } |
| + extensionsInCFS = new HashSet(); |
| + for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) { |
| + extensionsInCFS.add(IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]); |
| + } |
| } |
| |
| /* (non-Javadoc) |
| @@ -72,10 +77,7 @@ |
| int i = name.lastIndexOf('.'); |
| if (i != -1) { |
| String extension = name.substring(1+i); |
| - if (extensions.contains(extension) && |
| - !extension.equals("del") && |
| - !extension.equals("gen") && |
| - !extension.equals("cfs")) { |
| + if (extensionsInCFS.contains(extension)) { |
| return true; |
| } |
| if (extension.startsWith("f") && |
| Index: src/java/org/apache/lucene/index/SegmentReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/SegmentReader.java (revision 515500) |
| +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) |
| @@ -77,15 +77,6 @@ |
| |
| private void reWrite(SegmentInfo si) throws IOException { |
| // NOTE: norms are re-written in regular directory, not cfs |
| - |
| - String oldFileName = si.getNormFileName(this.number); |
| - if (oldFileName != null && !oldFileName.endsWith("." + IndexFileNames.NORMS_EXTENSION)) { |
| - // Mark this file for deletion. Note that we don't |
| - // actually try to delete it until the new segments files is |
| - // successfully written: |
| - deleter.addPendingFile(oldFileName); |
| - } |
| - |
| si.advanceNormGen(this.number); |
| IndexOutput out = directory().createOutput(si.getNormFileName(this.number)); |
| try { |
| @@ -227,14 +218,6 @@ |
| |
| protected void doCommit() throws IOException { |
| if (deletedDocsDirty) { // re-write deleted |
| - String oldDelFileName = si.getDelFileName(); |
| - if (oldDelFileName != null) { |
| - // Mark this file for deletion. Note that we don't |
| - // actually try to delete it until the new segments files is |
| - // successfully written: |
| - deleter.addPendingFile(oldDelFileName); |
| - } |
| - |
| si.advanceDelGen(); |
| |
| // We can write directly to the actual name (vs to a |
| @@ -243,13 +226,6 @@ |
| deletedDocs.write(directory(), si.getDelFileName()); |
| } |
| if (undeleteAll && si.hasDeletions()) { |
| - String oldDelFileName = si.getDelFileName(); |
| - if (oldDelFileName != null) { |
| - // Mark this file for deletion. Note that we don't |
| - // actually try to delete it until the new segments files is |
| - // successfully written: |
| - deleter.addPendingFile(oldDelFileName); |
| - } |
| si.clearDelGen(); |
| } |
| if (normsDirty) { // re-write norms |
| @@ -320,37 +296,7 @@ |
| } |
| |
| Vector files() throws IOException { |
| - Vector files = new Vector(16); |
| - |
| - if (si.getUseCompoundFile()) { |
| - String name = segment + ".cfs"; |
| - if (directory().fileExists(name)) { |
| - files.addElement(name); |
| - } |
| - } else { |
| - for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { |
| - String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; |
| - if (directory().fileExists(name)) |
| - files.addElement(name); |
| - } |
| - } |
| - |
| - if (si.hasDeletions()) { |
| - files.addElement(si.getDelFileName()); |
| - } |
| - |
| - boolean addedNrm = false; |
| - for (int i = 0; i < fieldInfos.size(); i++) { |
| - String name = si.getNormFileName(i); |
| - if (name != null && directory().fileExists(name)) { |
| - if (name.endsWith("." + IndexFileNames.NORMS_EXTENSION)) { |
| - if (addedNrm) continue; // add .nrm just once |
| - addedNrm = true; |
| - } |
| - files.addElement(name); |
| - } |
| - } |
| - return files; |
| + return new Vector(si.files()); |
| } |
| |
| public TermEnum terms() { |
| Index: src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java (revision 0) |
| +++ src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java (revision 0) |
| @@ -0,0 +1,50 @@ |
| +package org.apache.lucene.index; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.util.List; |
| + |
| +/** |
| + * This {@link IndexDeletionPolicy} implementation that |
| + * keeps only the most recent commit and immediately removes |
| + * all prior commits after a new commit is done. This is |
| + * the default deletion policy. |
| + */ |
| + |
| +public final class KeepOnlyLastCommitDeletionPolicy implements IndexDeletionPolicy { |
| + |
| + /** |
| + * Deletes all commits except the most recent one. |
| + */ |
| + public void onInit(List commits) { |
| + // Note that commits.size() should normally be 1: |
| + onCommit(commits); |
| + } |
| + |
| + /** |
| + * Deletes all commits except the most recent one. |
| + */ |
| + public void onCommit(List commits) { |
| + // Note that commits.size() should normally be 2 (if not |
| + // called by onInit above): |
| + int size = commits.size(); |
| + for(int i=0;i<size-1;i++) { |
| + ((IndexCommitPoint) commits.get(i)).delete(); |
| + } |
| + } |
| +} |
| |
| Property changes on: src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java |
| ___________________________________________________________________ |
| Name: svn:eol-style |
| + native |
| |
| Index: src/site/src/documentation/content/xdocs/fileformats.xml |
| =================================================================== |
| --- src/site/src/documentation/content/xdocs/fileformats.xml (revision 515500) |
| +++ src/site/src/documentation/content/xdocs/fileformats.xml (working copy) |
| @@ -771,7 +771,9 @@ |
| generation is the active one (when older |
| segments_N files are present it's because they |
| temporarily cannot be deleted, or, a writer is in |
| - the process of committing). This file lists each |
| + the process of committing, or a custom |
| + <a href="http://lucene.apache.org/java/docs/api/org/apache/lucene/index/IndexDeletionPolicy.html">IndexDeletionPolicy</a> |
| + is in use). This file lists each |
| segment by name, has details about the separate |
| norms and deletion files, and also contains the |
| size of each segment. |