| package org.apache.lucene.index; |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Random; |
| |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.CachingTokenFilter; |
| import org.apache.lucene.analysis.SimpleAnalyzer; |
| import org.apache.lucene.analysis.StopAnalyzer; |
| import org.apache.lucene.analysis.TeeSinkTokenFilter; |
| import org.apache.lucene.analysis.TokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.WhitespaceAnalyzer; |
| import org.apache.lucene.analysis.WhitespaceTokenizer; |
| import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| import org.apache.lucene.analysis.standard.StandardTokenizer; |
| import org.apache.lucene.analysis.tokenattributes.TermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.PhraseQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.Lock; |
| import org.apache.lucene.store.LockFactory; |
| import org.apache.lucene.store.MockRAMDirectory; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.store.SingleInstanceLockFactory; |
| import org.apache.lucene.util.UnicodeUtil; |
| import org.apache.lucene.util._TestUtil; |
| import org.apache.lucene.util.Version; |
| import org.apache.lucene.util.ThreadInterruptedException; |
| |
| public class TestIndexWriter extends LuceneTestCase { |
| public TestIndexWriter(String name) { |
| super(name); |
| } |
| |
| public void testDocCount() throws IOException |
| { |
| Directory dir = new RAMDirectory(); |
| |
| IndexWriter writer = null; |
| IndexReader reader = null; |
| int i; |
| |
| long savedWriteLockTimeout = IndexWriter.getDefaultWriteLockTimeout(); |
| try { |
| IndexWriter.setDefaultWriteLockTimeout(2000); |
| assertEquals(2000, IndexWriter.getDefaultWriteLockTimeout()); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| |
| } finally { |
| IndexWriter.setDefaultWriteLockTimeout(savedWriteLockTimeout); |
| } |
| |
| // add 100 documents |
| for (i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| assertEquals(100, writer.maxDoc()); |
| writer.close(); |
| |
| // delete 40 documents |
| reader = IndexReader.open(dir, false); |
| for (i = 0; i < 40; i++) { |
| reader.deleteDocument(i); |
| } |
| reader.close(); |
| |
| // test doc count before segments are merged/index is optimized |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| assertEquals(100, writer.maxDoc()); |
| writer.close(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(100, reader.maxDoc()); |
| assertEquals(60, reader.numDocs()); |
| reader.close(); |
| |
| // optimize the index and check that the new doc count is correct |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| assertEquals(100, writer.maxDoc()); |
| assertEquals(60, writer.numDocs()); |
| writer.optimize(); |
| assertEquals(60, writer.maxDoc()); |
| assertEquals(60, writer.numDocs()); |
| writer.close(); |
| |
| // check that the index reader gives the same numbers. |
| reader = IndexReader.open(dir, true); |
| assertEquals(60, reader.maxDoc()); |
| assertEquals(60, reader.numDocs()); |
| reader.close(); |
| |
| // make sure opening a new index for create over |
| // this existing one works correctly: |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| assertEquals(0, writer.maxDoc()); |
| assertEquals(0, writer.numDocs()); |
| writer.close(); |
| } |
| |
| private static void addDoc(IndexWriter writer) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| private void addDocWithIndex(IndexWriter writer, int index) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("id", "" + index, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| /* |
| Test: make sure when we run out of disk space or hit |
| random IOExceptions in any of the addIndexesNoOptimize(*) calls |
| that 1) index is not corrupt (searcher can open/search |
| it) and 2) transactional semantics are followed: |
| either all or none of the incoming documents were in |
| fact added. |
| */ |
| public void testAddIndexOnDiskFull() throws IOException |
| { |
| int START_COUNT = 57; |
| int NUM_DIR = 50; |
| int END_COUNT = START_COUNT + NUM_DIR*25; |
| |
| boolean debug = false; |
| |
| // Build up a bunch of dirs that have indexes which we |
| // will then merge together by calling addIndexesNoOptimize(*): |
| Directory[] dirs = new Directory[NUM_DIR]; |
| long inputDiskUsage = 0; |
| for(int i=0;i<NUM_DIR;i++) { |
| dirs[i] = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for(int j=0;j<25;j++) { |
| addDocWithIndex(writer, 25*i+j); |
| } |
| writer.close(); |
| String[] files = dirs[i].listAll(); |
| for(int j=0;j<files.length;j++) { |
| inputDiskUsage += dirs[i].fileLength(files[j]); |
| } |
| } |
| |
| // Now, build a starting index that has START_COUNT docs. We |
| // will then try to addIndexesNoOptimize into a copy of this: |
| RAMDirectory startDir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for(int j=0;j<START_COUNT;j++) { |
| addDocWithIndex(writer, j); |
| } |
| writer.close(); |
| |
| // Make sure starting index seems to be working properly: |
| Term searchTerm = new Term("content", "aaa"); |
| IndexReader reader = IndexReader.open(startDir, true); |
| assertEquals("first docFreq", 57, reader.docFreq(searchTerm)); |
| |
| IndexSearcher searcher = new IndexSearcher(reader); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("first number of hits", 57, hits.length); |
| searcher.close(); |
| reader.close(); |
| |
| // Iterate with larger and larger amounts of free |
| // disk space. With little free disk space, |
| // addIndexesNoOptimize will certainly run out of space & |
| // fail. Verify that when this happens, index is |
| // not corrupt and index in fact has added no |
| // documents. Then, we increase disk space by 2000 |
| // bytes each iteration. At some point there is |
| // enough free disk space and addIndexesNoOptimize should |
| // succeed and index should show all documents were |
| // added. |
| |
| // String[] files = startDir.listAll(); |
| long diskUsage = startDir.sizeInBytes(); |
| |
| long startDiskUsage = 0; |
| String[] files = startDir.listAll(); |
| for(int i=0;i<files.length;i++) { |
| startDiskUsage += startDir.fileLength(files[i]); |
| } |
| |
| for(int iter=0;iter<3;iter++) { |
| |
| if (debug) |
| System.out.println("TEST: iter=" + iter); |
| |
| // Start with 100 bytes more than we are currently using: |
| long diskFree = diskUsage+100; |
| |
| int method = iter; |
| |
| boolean success = false; |
| boolean done = false; |
| |
| String methodName; |
| if (0 == method) { |
| methodName = "addIndexes(Directory[]) + optimize()"; |
| } else if (1 == method) { |
| methodName = "addIndexes(IndexReader[])"; |
| } else { |
| methodName = "addIndexesNoOptimize(Directory[])"; |
| } |
| |
| while(!done) { |
| |
| // Make a new dir that will enforce disk usage: |
| MockRAMDirectory dir = new MockRAMDirectory(startDir); |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); |
| IOException err = null; |
| |
| MergeScheduler ms = writer.getMergeScheduler(); |
| for(int x=0;x<2;x++) { |
| if (ms instanceof ConcurrentMergeScheduler) |
| // This test intentionally produces exceptions |
| // in the threads that CMS launches; we don't |
| // want to pollute test output with these. |
| if (0 == x) |
| ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); |
| else |
| ((ConcurrentMergeScheduler) ms).clearSuppressExceptions(); |
| |
| // Two loops: first time, limit disk space & |
| // throw random IOExceptions; second time, no |
| // disk space limit: |
| |
| double rate = 0.05; |
| double diskRatio = ((double) diskFree)/diskUsage; |
| long thisDiskFree; |
| |
| String testName = null; |
| |
| if (0 == x) { |
| thisDiskFree = diskFree; |
| if (diskRatio >= 2.0) { |
| rate /= 2; |
| } |
| if (diskRatio >= 4.0) { |
| rate /= 2; |
| } |
| if (diskRatio >= 6.0) { |
| rate = 0.0; |
| } |
| if (debug) |
| testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; |
| } else { |
| thisDiskFree = 0; |
| rate = 0.0; |
| if (debug) |
| testName = "disk full test " + methodName + " with unlimited disk space"; |
| } |
| |
| if (debug) |
| System.out.println("\ncycle: " + testName); |
| |
| dir.setMaxSizeInBytes(thisDiskFree); |
| dir.setRandomIOExceptionRate(rate, diskFree); |
| |
| try { |
| |
| if (0 == method) { |
| writer.addIndexesNoOptimize(dirs); |
| writer.optimize(); |
| } else if (1 == method) { |
| IndexReader readers[] = new IndexReader[dirs.length]; |
| for(int i=0;i<dirs.length;i++) { |
| readers[i] = IndexReader.open(dirs[i], true); |
| } |
| try { |
| writer.addIndexes(readers); |
| } finally { |
| for(int i=0;i<dirs.length;i++) { |
| readers[i].close(); |
| } |
| } |
| } else { |
| writer.addIndexesNoOptimize(dirs); |
| } |
| |
| success = true; |
| if (debug) { |
| System.out.println(" success!"); |
| } |
| |
| if (0 == x) { |
| done = true; |
| } |
| |
| } catch (IOException e) { |
| success = false; |
| err = e; |
| if (debug) { |
| System.out.println(" hit IOException: " + e); |
| e.printStackTrace(System.out); |
| } |
| |
| if (1 == x) { |
| e.printStackTrace(System.out); |
| fail(methodName + " hit IOException after disk space was freed up"); |
| } |
| } |
| |
| // Make sure all threads from |
| // ConcurrentMergeScheduler are done |
| _TestUtil.syncConcurrentMerges(writer); |
| |
| if (debug) { |
| System.out.println(" now test readers"); |
| } |
| |
| // Finally, verify index is not corrupt, and, if |
| // we succeeded, we see all docs added, and if we |
| // failed, we see either all docs or no docs added |
| // (transactional semantics): |
| try { |
| reader = IndexReader.open(dir, true); |
| } catch (IOException e) { |
| e.printStackTrace(System.out); |
| fail(testName + ": exception when creating IndexReader: " + e); |
| } |
| int result = reader.docFreq(searchTerm); |
| if (success) { |
| if (result != START_COUNT) { |
| fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT); |
| } |
| } else { |
| // On hitting exception we still may have added |
| // all docs: |
| if (result != START_COUNT && result != END_COUNT) { |
| err.printStackTrace(System.out); |
| fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); |
| } |
| } |
| |
| searcher = new IndexSearcher(reader); |
| try { |
| hits = searcher.search(new TermQuery(searchTerm), null, END_COUNT).scoreDocs; |
| } catch (IOException e) { |
| e.printStackTrace(System.out); |
| fail(testName + ": exception when searching: " + e); |
| } |
| int result2 = hits.length; |
| if (success) { |
| if (result2 != result) { |
| fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); |
| } |
| } else { |
| // On hitting exception we still may have added |
| // all docs: |
| if (result2 != result) { |
| err.printStackTrace(System.out); |
| fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); |
| } |
| } |
| |
| searcher.close(); |
| reader.close(); |
| if (debug) { |
| System.out.println(" count is " + result); |
| } |
| |
| if (done || result == END_COUNT) { |
| break; |
| } |
| } |
| |
| if (debug) { |
| System.out.println(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.getMaxUsedSizeInBytes()); |
| } |
| |
| if (done) { |
| // Javadocs state that temp free Directory space |
| // required is at most 2X total input size of |
| // indices so let's make sure: |
| assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName + |
| ": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " + |
| "starting disk usage = " + startDiskUsage + " bytes; " + |
| "input index disk usage = " + inputDiskUsage + " bytes", |
| (dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage)); |
| } |
| |
| // Make sure we don't hit disk full during close below: |
| dir.setMaxSizeInBytes(0); |
| dir.setRandomIOExceptionRate(0.0, 0); |
| |
| writer.close(); |
| |
| // Wait for all BG threads to finish else |
| // dir.close() will throw IOException because |
| // there are still open files |
| _TestUtil.syncConcurrentMerges(ms); |
| |
| dir.close(); |
| |
| // Try again with 5000 more bytes of free space: |
| diskFree += 5000; |
| } |
| } |
| |
| startDir.close(); |
| } |
| |
| /* |
| * Make sure IndexWriter cleans up on hitting a disk |
| * full exception in addDocument. |
| */ |
| public void testAddDocumentOnDiskFull() throws IOException { |
| |
| boolean debug = false; |
| |
| for(int pass=0;pass<2;pass++) { |
| if (debug) |
| System.out.println("TEST: pass=" + pass); |
| boolean doAbort = pass == 1; |
| long diskFree = 200; |
| while(true) { |
| if (debug) |
| System.out.println("TEST: cycle: diskFree=" + diskFree); |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| dir.setMaxSizeInBytes(diskFree); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| |
| MergeScheduler ms = writer.getMergeScheduler(); |
| if (ms instanceof ConcurrentMergeScheduler) |
| // This test intentionally produces exceptions |
| // in the threads that CMS launches; we don't |
| // want to pollute test output with these. |
| ((ConcurrentMergeScheduler) ms).setSuppressExceptions(); |
| |
| boolean hitError = false; |
| try { |
| for(int i=0;i<200;i++) { |
| addDoc(writer); |
| } |
| } catch (IOException e) { |
| if (debug) { |
| System.out.println("TEST: exception on addDoc"); |
| e.printStackTrace(System.out); |
| } |
| hitError = true; |
| } |
| |
| if (hitError) { |
| if (doAbort) { |
| writer.rollback(); |
| } else { |
| try { |
| writer.close(); |
| } catch (IOException e) { |
| if (debug) { |
| System.out.println("TEST: exception on close"); |
| e.printStackTrace(System.out); |
| } |
| dir.setMaxSizeInBytes(0); |
| writer.close(); |
| } |
| } |
| |
| _TestUtil.syncConcurrentMerges(ms); |
| |
| assertNoUnreferencedFiles(dir, "after disk full during addDocument"); |
| |
| // Make sure reader can open the index: |
| IndexReader.open(dir, true).close(); |
| |
| dir.close(); |
| |
| // Now try again w/ more space: |
| diskFree += 500; |
| } else { |
| _TestUtil.syncConcurrentMerges(writer); |
| dir.close(); |
| break; |
| } |
| } |
| } |
| } |
| |
| public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { |
| String[] startFiles = dir.listAll(); |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); |
| String[] endFiles = dir.listAll(); |
| |
| Arrays.sort(startFiles); |
| Arrays.sort(endFiles); |
| |
| if (!Arrays.equals(startFiles, endFiles)) { |
| fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); |
| } |
| } |
| |
| /** |
| * Make sure we skip wicked long terms. |
| */ |
| public void testWickedLongTerm() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE-1]; |
| Arrays.fill(chars, 'x'); |
| Document doc = new Document(); |
| final String bigTerm = new String(chars); |
| |
| // Max length term is 16383, so this contents produces |
| // a too-long term: |
| String contents = "abc xyz x" + bigTerm + " another term"; |
| doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| |
| // Make sure we can add another normal document |
| doc = new Document(); |
| doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| |
| // Make sure all terms < max size were indexed |
| assertEquals(2, reader.docFreq(new Term("content", "abc"))); |
| assertEquals(1, reader.docFreq(new Term("content", "bbb"))); |
| assertEquals(1, reader.docFreq(new Term("content", "term"))); |
| assertEquals(1, reader.docFreq(new Term("content", "another"))); |
| |
| // Make sure position is still incremented when |
| // massive term is skipped: |
| TermPositions tps = reader.termPositions(new Term("content", "another")); |
| assertTrue(tps.next()); |
| assertEquals(1, tps.freq()); |
| assertEquals(3, tps.nextPosition()); |
| |
| // Make sure the doc that has the massive term is in |
| // the index: |
| assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); |
| |
| reader.close(); |
| |
| // Make sure we can add a document with exactly the |
| // maximum length term, and search on that term: |
| doc = new Document(); |
| doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED)); |
| StandardAnalyzer sa = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT); |
| sa.setMaxTokenLength(100000); |
| writer = new IndexWriter(dir, sa, IndexWriter.MaxFieldLength.LIMITED); |
| writer.addDocument(doc); |
| writer.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(1, reader.docFreq(new Term("content", bigTerm))); |
| reader.close(); |
| |
| dir.close(); |
| } |
| |
| public void testOptimizeMaxNumSegments() throws IOException { |
| |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| |
| final Document doc = new Document(); |
| doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| |
| for(int numDocs=38;numDocs<500;numDocs += 38) { |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| LogDocMergePolicy ldmp = new LogDocMergePolicy(); |
| ldmp.setMinMergeDocs(1); |
| writer.setMergePolicy(ldmp); |
| writer.setMergeFactor(5); |
| writer.setMaxBufferedDocs(2); |
| for(int j=0;j<numDocs;j++) |
| writer.addDocument(doc); |
| writer.close(); |
| |
| SegmentInfos sis = new SegmentInfos(); |
| sis.read(dir); |
| final int segCount = sis.size(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| ldmp = new LogDocMergePolicy(); |
| ldmp.setMinMergeDocs(1); |
| writer.setMergePolicy(ldmp); |
| writer.setMergeFactor(5); |
| writer.optimize(3); |
| writer.close(); |
| |
| sis = new SegmentInfos(); |
| sis.read(dir); |
| final int optSegCount = sis.size(); |
| |
| if (segCount < 3) |
| assertEquals(segCount, optSegCount); |
| else |
| assertEquals(3, optSegCount); |
| } |
| } |
| |
| public void testOptimizeMaxNumSegments2() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| |
| final Document doc = new Document(); |
| doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| LogDocMergePolicy ldmp = new LogDocMergePolicy(); |
| ldmp.setMinMergeDocs(1); |
| writer.setMergePolicy(ldmp); |
| writer.setMergeFactor(4); |
| writer.setMaxBufferedDocs(2); |
| |
| for(int iter=0;iter<10;iter++) { |
| for(int i=0;i<19;i++) |
| writer.addDocument(doc); |
| |
| ((ConcurrentMergeScheduler) writer.getMergeScheduler()).sync(); |
| writer.commit(); |
| |
| SegmentInfos sis = new SegmentInfos(); |
| sis.read(dir); |
| |
| final int segCount = sis.size(); |
| |
| writer.optimize(7); |
| writer.commit(); |
| |
| sis = new SegmentInfos(); |
| ((ConcurrentMergeScheduler) writer.getMergeScheduler()).sync(); |
| sis.read(dir); |
| final int optSegCount = sis.size(); |
| |
| if (segCount < 7) |
| assertEquals(segCount, optSegCount); |
| else |
| assertEquals(7, optSegCount); |
| } |
| } |
| |
| /** |
| * Make sure optimize doesn't use any more than 1X |
| * starting index size as its temporary free space |
| * required. |
| */ |
| public void testOptimizeTempSpaceUsage() throws IOException { |
| |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for(int j=0;j<500;j++) { |
| addDocWithIndex(writer, j); |
| } |
| writer.close(); |
| |
| long startDiskUsage = 0; |
| String[] files = dir.listAll(); |
| for(int i=0;i<files.length;i++) { |
| startDiskUsage += dir.fileLength(files[i]); |
| } |
| |
| dir.resetMaxUsedSizeInBytes(); |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.optimize(); |
| writer.close(); |
| long maxDiskUsage = dir.getMaxUsedSizeInBytes(); |
| |
| assertTrue("optimized used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (2*startDiskUsage) + " (= 2X starting usage)", |
| maxDiskUsage <= 2*startDiskUsage); |
| dir.close(); |
| } |
| |
| static String arrayToString(String[] l) { |
| String s = ""; |
| for(int i=0;i<l.length;i++) { |
| if (i > 0) { |
| s += "\n "; |
| } |
| s += l[i]; |
| } |
| return s; |
| } |
| |
| // Make sure we can open an index for create even when a |
| // reader holds it open (this fails pre lock-less |
| // commits on windows): |
| public void testCreateWithReader() throws IOException { |
| File indexDir = _TestUtil.getTempDir("lucenetestindexwriter"); |
| |
| try { |
| Directory dir = FSDirectory.open(indexDir); |
| |
| // add one document & close writer |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| addDoc(writer); |
| writer.close(); |
| |
| // now open reader: |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals("should be one document", reader.numDocs(), 1); |
| |
| // now open index for create: |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| assertEquals("should be zero documents", writer.maxDoc(), 0); |
| addDoc(writer); |
| writer.close(); |
| |
| assertEquals("should be one document", reader.numDocs(), 1); |
| IndexReader reader2 = IndexReader.open(dir, true); |
| assertEquals("should be one document", reader2.numDocs(), 1); |
| reader.close(); |
| reader2.close(); |
| } finally { |
| rmDir(indexDir); |
| } |
| } |
| |
| // Simulate a writer that crashed while writing segments |
| // file: make sure we can still open the index (ie, |
| // gracefully fallback to the previous segments file), |
| // and that we can add to the index: |
| public void testSimulatedCrashedWriter() throws IOException { |
| Directory dir = new RAMDirectory(); |
| |
| IndexWriter writer = null; |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| // add 100 documents |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| |
| // close |
| writer.close(); |
| |
| long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| assertTrue("segment generation should be > 1 but got " + gen, gen > 1); |
| |
| // Make the next segments file, with last byte |
| // missing, to simulate a writer that crashed while |
| // writing segments file: |
| String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); |
| String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, |
| "", |
| 1+gen); |
| IndexInput in = dir.openInput(fileNameIn); |
| IndexOutput out = dir.createOutput(fileNameOut); |
| long length = in.length(); |
| for(int i=0;i<length-1;i++) { |
| out.writeByte(in.readByte()); |
| } |
| in.close(); |
| out.close(); |
| |
| IndexReader reader = null; |
| try { |
| reader = IndexReader.open(dir, true); |
| } catch (Exception e) { |
| fail("reader failed to open on a crashed index"); |
| } |
| reader.close(); |
| |
| try { |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| } catch (Exception e) { |
| fail("writer failed to open on a crashed index"); |
| } |
| |
| // add 100 documents |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| |
| // close |
| writer.close(); |
| } |
| |
| // Simulate a corrupt index by removing last byte of |
| // latest segments file and make sure we get an |
| // IOException trying to open the index: |
| public void testSimulatedCorruptIndex1() throws IOException { |
| Directory dir = new RAMDirectory(); |
| |
| IndexWriter writer = null; |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| // add 100 documents |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| |
| // close |
| writer.close(); |
| |
| long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| assertTrue("segment generation should be > 1 but got " + gen, gen > 1); |
| |
| String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); |
| String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, |
| "", |
| 1+gen); |
| IndexInput in = dir.openInput(fileNameIn); |
| IndexOutput out = dir.createOutput(fileNameOut); |
| long length = in.length(); |
| for(int i=0;i<length-1;i++) { |
| out.writeByte(in.readByte()); |
| } |
| in.close(); |
| out.close(); |
| dir.deleteFile(fileNameIn); |
| |
| IndexReader reader = null; |
| try { |
| reader = IndexReader.open(dir, true); |
| fail("reader did not hit IOException on opening a corrupt index"); |
| } catch (Exception e) { |
| } |
| if (reader != null) { |
| reader.close(); |
| } |
| } |
| |
| public void testChangesAfterClose() throws IOException { |
| Directory dir = new RAMDirectory(); |
| |
| IndexWriter writer = null; |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| addDoc(writer); |
| |
| // close |
| writer.close(); |
| try { |
| addDoc(writer); |
| fail("did not hit AlreadyClosedException"); |
| } catch (AlreadyClosedException e) { |
| // expected |
| } |
| } |
| |
| |
| // Simulate a corrupt index by removing one of the cfs |
| // files and make sure we get an IOException trying to |
| // open the index: |
| public void testSimulatedCorruptIndex2() throws IOException { |
| Directory dir = new RAMDirectory(); |
| |
| IndexWriter writer = null; |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| // add 100 documents |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| |
| // close |
| writer.close(); |
| |
| long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| assertTrue("segment generation should be > 1 but got " + gen, gen > 1); |
| |
| String[] files = dir.listAll(); |
| for(int i=0;i<files.length;i++) { |
| if (files[i].endsWith(".cfs")) { |
| dir.deleteFile(files[i]); |
| break; |
| } |
| } |
| |
| IndexReader reader = null; |
| try { |
| reader = IndexReader.open(dir, true); |
| fail("reader did not hit IOException on opening a corrupt index"); |
| } catch (Exception e) { |
| } |
| if (reader != null) { |
| reader.close(); |
| } |
| } |
| |
| /* |
| * Simple test for "commit on close": open writer then |
| * add a bunch of docs, making sure reader does not see |
| * these docs until writer is closed. |
| */ |
| public void testCommitOnClose() throws IOException { |
| Directory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for (int i = 0; i < 14; i++) { |
| addDoc(writer); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("content", "aaa"); |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("first number of hits", 14, hits.length); |
| searcher.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| for(int i=0;i<3;i++) { |
| for(int j=0;j<11;j++) { |
| addDoc(writer); |
| } |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader incorrectly sees changes from writer", 14, hits.length); |
| searcher.close(); |
| assertTrue("reader should have still been current", reader.isCurrent()); |
| } |
| |
| // Now, close the writer: |
| writer.close(); |
| assertFalse("reader should not be current now", reader.isCurrent()); |
| |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader did not see changes after writer was closed", 47, hits.length); |
| searcher.close(); |
| } |
| |
| /* |
| * Simple test for "commit on close": open writer, then |
| * add a bunch of docs, making sure reader does not see |
| * them until writer has closed. Then instead of |
| * closing the writer, call abort and verify reader sees |
| * nothing was added. Then verify we can open the index |
| * and add docs to it. |
| */ |
| public void testCommitOnCloseAbort() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| for (int i = 0; i < 14; i++) { |
| addDoc(writer); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("content", "aaa"); |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("first number of hits", 14, hits.length); |
| searcher.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| for(int j=0;j<17;j++) { |
| addDoc(writer); |
| } |
| // Delete all docs: |
| writer.deleteDocuments(searchTerm); |
| |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader incorrectly sees changes from writer", 14, hits.length); |
| searcher.close(); |
| |
| // Now, close the writer: |
| writer.rollback(); |
| |
| assertNoUnreferencedFiles(dir, "unreferenced files remain after rollback()"); |
| |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("saw changes after writer.abort", 14, hits.length); |
| searcher.close(); |
| |
| // Now make sure we can re-open the index, add docs, |
| // and all is good: |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| |
| // On abort, writer in fact may write to the same |
| // segments_N file: |
| dir.setPreventDoubleWrite(false); |
| |
| for(int i=0;i<12;i++) { |
| for(int j=0;j<17;j++) { |
| addDoc(writer); |
| } |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader incorrectly sees changes from writer", 14, hits.length); |
| searcher.close(); |
| } |
| |
| writer.close(); |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("didn't see changes after close", 218, hits.length); |
| searcher.close(); |
| |
| dir.close(); |
| } |
| |
| /* |
| * Verify that a writer with "commit on close" indeed |
| * cleans up the temp segments created after opening |
| * that are not referenced by the starting segments |
| * file. We check this by using MockRAMDirectory to |
| * measure max temp disk space used. |
| */ |
| public void testCommitOnCloseDiskUsage() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for(int j=0;j<30;j++) { |
| addDocWithIndex(writer, j); |
| } |
| writer.close(); |
| dir.resetMaxUsedSizeInBytes(); |
| |
| long startDiskUsage = dir.getMaxUsedSizeInBytes(); |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| writer.setMergeScheduler(new SerialMergeScheduler()); |
| for(int j=0;j<1470;j++) { |
| addDocWithIndex(writer, j); |
| } |
| long midDiskUsage = dir.getMaxUsedSizeInBytes(); |
| dir.resetMaxUsedSizeInBytes(); |
| writer.optimize(); |
| writer.close(); |
| |
| IndexReader.open(dir, true).close(); |
| |
| long endDiskUsage = dir.getMaxUsedSizeInBytes(); |
| |
| // Ending index is 50X as large as starting index; due |
| // to 2X disk usage normally we allow 100X max |
| // transient usage. If something is wrong w/ deleter |
| // and it doesn't delete intermediate segments then it |
| // will exceed this 100X: |
| // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); |
| assertTrue("writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage, |
| midDiskUsage < 100*startDiskUsage); |
| assertTrue("writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage, |
| endDiskUsage < 100*startDiskUsage); |
| } |
| |
| |
| /* |
| * Verify that calling optimize when writer is open for |
| * "commit on close" works correctly both for rollback() |
| * and close(). |
| */ |
| public void testCommitOnCloseOptimize() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| for(int j=0;j<17;j++) { |
| addDocWithIndex(writer, j); |
| } |
| writer.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.optimize(); |
| |
| // Open a reader before closing (commiting) the writer: |
| IndexReader reader = IndexReader.open(dir, true); |
| |
| // Reader should see index as unoptimized at this |
| // point: |
| assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); |
| reader.close(); |
| |
| // Abort the writer: |
| writer.rollback(); |
| assertNoUnreferencedFiles(dir, "aborted writer after optimize"); |
| |
| // Open a reader after aborting writer: |
| reader = IndexReader.open(dir, true); |
| |
| // Reader should still see index as unoptimized: |
| assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); |
| reader.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.optimize(); |
| writer.close(); |
| assertNoUnreferencedFiles(dir, "aborted writer after optimize"); |
| |
| // Open a reader after aborting writer: |
| reader = IndexReader.open(dir, true); |
| |
| // Reader should still see index as unoptimized: |
| assertTrue("Reader incorrectly sees that the index is unoptimized", reader.isOptimized()); |
| reader.close(); |
| } |
| |
| public void testIndexNoDocuments() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.commit(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.maxDoc()); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); |
| writer.commit(); |
| writer.close(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.maxDoc()); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| } |
| |
| public void testManyFields() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| for(int j=0;j<100;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(100, reader.maxDoc()); |
| assertEquals(100, reader.numDocs()); |
| for(int j=0;j<100;j++) { |
| assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j))); |
| assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j))); |
| assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j))); |
| assertEquals(1, reader.docFreq(new Term("d"+j, "aaa"))); |
| assertEquals(1, reader.docFreq(new Term("e"+j, "aaa"))); |
| assertEquals(1, reader.docFreq(new Term("f"+j, "aaa"))); |
| } |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testSmallRAMBuffer() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setRAMBufferSizeMB(0.000001); |
| int lastNumFile = dir.listAll().length; |
| for(int j=0;j<9;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| int numFile = dir.listAll().length; |
| // Verify that with a tiny RAM buffer we see new |
| // segment after every doc |
| assertTrue(numFile > lastNumFile); |
| lastNumFile = numFile; |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| // Make sure it's OK to change RAM buffer size and |
| // maxBufferedDocs in a write session |
| public void testChangingRAMBuffer() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| |
| int lastFlushCount = -1; |
| for(int j=1;j<52;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| _TestUtil.syncConcurrentMerges(writer); |
| int flushCount = writer.getFlushCount(); |
| if (j == 1) |
| lastFlushCount = flushCount; |
| else if (j < 10) |
| // No new files should be created |
| assertEquals(flushCount, lastFlushCount); |
| else if (10 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| writer.setRAMBufferSizeMB(0.000001); |
| writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); |
| } else if (j < 20) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (20 == j) { |
| writer.setRAMBufferSizeMB(16); |
| writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 30) { |
| assertEquals(flushCount, lastFlushCount); |
| } else if (30 == j) { |
| writer.setRAMBufferSizeMB(0.000001); |
| writer.setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); |
| } else if (j < 40) { |
| assertTrue(flushCount> lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (40 == j) { |
| writer.setMaxBufferedDocs(10); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 50) { |
| assertEquals(flushCount, lastFlushCount); |
| writer.setMaxBufferedDocs(10); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| } else if (50 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testChangingRAMBuffer2() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| writer.setMaxBufferedDeleteTerms(10); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| |
| for(int j=1;j<52;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| int lastFlushCount = -1; |
| for(int j=1;j<52;j++) { |
| writer.deleteDocuments(new Term("field", "aaa" + j)); |
| _TestUtil.syncConcurrentMerges(writer); |
| int flushCount = writer.getFlushCount(); |
| if (j == 1) |
| lastFlushCount = flushCount; |
| else if (j < 10) { |
| // No new files should be created |
| assertEquals(flushCount, lastFlushCount); |
| } else if (10 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| writer.setRAMBufferSizeMB(0.000001); |
| writer.setMaxBufferedDeleteTerms(1); |
| } else if (j < 20) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (20 == j) { |
| writer.setRAMBufferSizeMB(16); |
| writer.setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 30) { |
| assertEquals(flushCount, lastFlushCount); |
| } else if (30 == j) { |
| writer.setRAMBufferSizeMB(0.000001); |
| writer.setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH); |
| writer.setMaxBufferedDeleteTerms(1); |
| } else if (j < 40) { |
| assertTrue(flushCount> lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (40 == j) { |
| writer.setMaxBufferedDeleteTerms(10); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 50) { |
| assertEquals(flushCount, lastFlushCount); |
| writer.setMaxBufferedDeleteTerms(10); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| } else if (50 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testDiverseDocs() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setRAMBufferSizeMB(0.5); |
| Random rand = newRandom(); |
| for(int i=0;i<3;i++) { |
| // First, docs where every term is unique (heavy on |
| // Posting instances) |
| for(int j=0;j<100;j++) { |
| Document doc = new Document(); |
| for(int k=0;k<100;k++) { |
| doc.add(new Field("field", Integer.toString(rand.nextInt()), Field.Store.YES, Field.Index.ANALYZED)); |
| } |
| writer.addDocument(doc); |
| } |
| |
| // Next, many single term docs where only one term |
| // occurs (heavy on byte blocks) |
| for(int j=0;j<100;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| // Next, many single term docs where only one term |
| // occurs but the terms are very long (heavy on |
| // char[] arrays) |
| for(int j=0;j<100;j++) { |
| StringBuilder b = new StringBuilder(); |
| String x = Integer.toString(j) + "."; |
| for(int k=0;k<1000;k++) |
| b.append(x); |
| String longTerm = b.toString(); |
| |
| Document doc = new Document(); |
| doc.add(new Field("field", longTerm, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| } |
| writer.close(); |
| |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1000).scoreDocs; |
| assertEquals(300, hits.length); |
| searcher.close(); |
| |
| dir.close(); |
| } |
| |
| public void testEnablingNorms() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| // Enable norms for only 1 doc, pre flush |
| for(int j=0;j<10;j++) { |
| Document doc = new Document(); |
| Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); |
| if (j != 8) { |
| f.setOmitNorms(true); |
| } |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("field", "aaa"); |
| |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals(10, hits.length); |
| searcher.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| // Enable norms for only 1 doc, post flush |
| for(int j=0;j<27;j++) { |
| Document doc = new Document(); |
| Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); |
| if (j != 26) { |
| f.setOmitNorms(true); |
| } |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals(27, hits.length); |
| searcher.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| reader.close(); |
| |
| dir.close(); |
| } |
| |
| public void testHighFreqTerm() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, new IndexWriter.MaxFieldLength(100000000)); |
| writer.setRAMBufferSizeMB(0.01); |
| // Massive doc that has 128 K a's |
| StringBuilder b = new StringBuilder(1024*1024); |
| for(int i=0;i<4096;i++) { |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| } |
| Document doc = new Document(); |
| doc.add(new Field("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(1, reader.maxDoc()); |
| assertEquals(1, reader.numDocs()); |
| Term t = new Term("field", "a"); |
| assertEquals(1, reader.docFreq(t)); |
| TermDocs td = reader.termDocs(t); |
| td.next(); |
| assertEquals(128*1024, td.freq()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // Make sure that a Directory implementation that does |
| // not use LockFactory at all (ie overrides makeLock and |
| // implements its own private locking) works OK. This |
| // was raised on java-dev as loss of backwards |
| // compatibility. |
| public void testNullLockFactory() throws IOException { |
| |
| final class MyRAMDirectory extends RAMDirectory { |
| private LockFactory myLockFactory; |
| MyRAMDirectory() { |
| lockFactory = null; |
| myLockFactory = new SingleInstanceLockFactory(); |
| } |
| @Override |
| public Lock makeLock(String name) { |
| return myLockFactory.makeLock(name); |
| } |
| } |
| |
| Directory dir = new MyRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| writer.close(); |
| Term searchTerm = new Term("content", "aaa"); |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("did not get right number of hits", 100, hits.length); |
| writer.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.close(); |
| |
| dir.close(); |
| } |
| |
| public void testFlushWithNoMerging() throws IOException { |
| Directory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| for(int i=0;i<19;i++) |
| writer.addDocument(doc); |
| writer.flush(false, true, true); |
| writer.close(); |
| SegmentInfos sis = new SegmentInfos(); |
| sis.read(dir); |
| // Since we flushed w/o allowing merging we should now |
| // have 10 segments |
| assert sis.size() == 10; |
| } |
| |
| // Make sure we can flush segment w/ norms, then add |
| // empty doc (no norms) and flush |
| public void testEmptyDocAfterFlushingRealDoc() throws IOException { |
| Directory dir = new RAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.commit(); |
| writer.addDocument(new Document()); |
| writer.close(); |
| _TestUtil.checkIndex(dir); |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(2, reader.numDocs()); |
| } |
| |
| // Test calling optimize(false) whereby optimize is kicked |
| // off but we don't wait for it to finish (but |
| // writer.close()) does wait |
| public void testBackgroundOptimize() throws IOException { |
| |
| Directory dir = new MockRAMDirectory(); |
| for(int pass=0;pass<2;pass++) { |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMergeScheduler(new ConcurrentMergeScheduler()); |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(101); |
| for(int i=0;i<200;i++) |
| writer.addDocument(doc); |
| writer.optimize(false); |
| |
| if (0 == pass) { |
| writer.close(); |
| IndexReader reader = IndexReader.open(dir, true); |
| assertTrue(reader.isOptimized()); |
| reader.close(); |
| } else { |
| // Get another segment to flush so we can verify it is |
| // NOT included in the optimization |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertTrue(!reader.isOptimized()); |
| reader.close(); |
| |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| assertEquals(2, infos.size()); |
| } |
| } |
| |
| dir.close(); |
| } |
| |
| private void rmDir(File dir) { |
| File[] files = dir.listFiles(); |
| if (files != null) { |
| for (int i = 0; i < files.length; i++) { |
| files[i].delete(); |
| } |
| } |
| dir.delete(); |
| } |
| |
| /** |
| * Test that no NullPointerException will be raised, |
| * when adding one document with a single, empty field |
| * and term vectors enabled. |
| * @throws IOException |
| * |
| */ |
| public void testBadSegment() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter ir = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| Document document = new Document(); |
| document.add(new Field("tvtest", "", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| ir.addDocument(document); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1008 |
| public void testNoTermVectorAfterTermVector() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| Document document = new Document(); |
| document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| iw.addDocument(document); |
| document = new Document(); |
| document.add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.NO)); |
| iw.addDocument(document); |
| // Make first segment |
| iw.commit(); |
| |
| document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| iw.addDocument(document); |
| // Make 2nd segment |
| iw.commit(); |
| |
| iw.optimize(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1010 |
| public void testNoTermVectorAfterTermVectorMerge() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| Document document = new Document(); |
| document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| iw.addDocument(document); |
| iw.commit(); |
| |
| document = new Document(); |
| document.add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.NO)); |
| iw.addDocument(document); |
| // Make first segment |
| iw.commit(); |
| |
| iw.optimize(); |
| |
| document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| iw.addDocument(document); |
| // Make 2nd segment |
| iw.commit(); |
| iw.optimize(); |
| |
| iw.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1036 |
| public void testMaxThreadPriority() throws IOException { |
| int pri = Thread.currentThread().getPriority(); |
| try { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| Document document = new Document(); |
| document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| iw.setMaxBufferedDocs(2); |
| iw.setMergeFactor(2); |
| Thread.currentThread().setPriority(Thread.MAX_PRIORITY); |
| for(int i=0;i<4;i++) |
| iw.addDocument(document); |
| iw.close(); |
| |
| } finally { |
| Thread.currentThread().setPriority(pri); |
| } |
| } |
| |
| // Just intercepts all merges & verifies that we are never |
| // merging a segment with >= 20 (maxMergeDocs) docs |
| private class MyMergeScheduler extends MergeScheduler { |
| @Override |
| synchronized public void merge(IndexWriter writer) |
| throws CorruptIndexException, IOException { |
| |
| while(true) { |
| MergePolicy.OneMerge merge = writer.getNextMerge(); |
| if (merge == null) |
| break; |
| for(int i=0;i<merge.segments.size();i++) |
| assert merge.segments.info(i).docCount < 20; |
| writer.merge(merge); |
| } |
| } |
| |
| @Override |
| public void close() {} |
| } |
| |
| // LUCENE-1013 |
| public void testSetMaxMergeDocs() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); |
| iw.setMergeScheduler(new MyMergeScheduler()); |
| iw.setMaxMergeDocs(20); |
| iw.setMaxBufferedDocs(2); |
| iw.setMergeFactor(2); |
| Document document = new Document(); |
| document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| for(int i=0;i<177;i++) |
| iw.addDocument(document); |
| iw.close(); |
| } |
| |
| // LUCENE-1072 |
| public void testExceptionFromTokenStream() throws IOException { |
| RAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new Analyzer() { |
| |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new TokenFilter(new StandardTokenizer(Version.LUCENE_CURRENT, reader)) { |
| private int count = 0; |
| |
| @Override |
| public boolean incrementToken() throws IOException { |
| if (count++ == 5) { |
| throw new IOException(); |
| } |
| return input.incrementToken(); |
| } |
| }; |
| } |
| |
| }, true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| Document doc = new Document(); |
| String contents = "aa bb cc dd ee ff gg hh ii jj kk"; |
| doc.add(new Field("content", contents, Field.Store.NO, |
| Field.Index.ANALYZED)); |
| try { |
| writer.addDocument(doc); |
| fail("did not hit expected exception"); |
| } catch (Exception e) { |
| } |
| |
| // Make sure we can add another normal document |
| doc = new Document(); |
| doc.add(new Field("content", "aa bb cc dd", Field.Store.NO, |
| Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| |
| // Make sure we can add another normal document |
| doc = new Document(); |
| doc.add(new Field("content", "aa bb cc dd", Field.Store.NO, |
| Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| |
| writer.close(); |
| IndexReader reader = IndexReader.open(dir, true); |
| final Term t = new Term("content", "aa"); |
| assertEquals(reader.docFreq(t), 3); |
| |
| // Make sure the doc that hit the exception was marked |
| // as deleted: |
| TermDocs tdocs = reader.termDocs(t); |
| int count = 0; |
| while(tdocs.next()) { |
| count++; |
| } |
| assertEquals(2, count); |
| |
| assertEquals(reader.docFreq(new Term("content", "gg")), 0); |
| reader.close(); |
| dir.close(); |
| } |
| |
| private static class FailOnlyOnFlush extends MockRAMDirectory.Failure { |
| boolean doFail = false; |
| int count; |
| |
| @Override |
| public void setDoFail() { |
| this.doFail = true; |
| } |
| @Override |
| public void clearDoFail() { |
| this.doFail = false; |
| } |
| |
| @Override |
| public void eval(MockRAMDirectory dir) throws IOException { |
| if (doFail) { |
| StackTraceElement[] trace = new Exception().getStackTrace(); |
| boolean sawAppend = false; |
| boolean sawFlush = false; |
| for (int i = 0; i < trace.length; i++) { |
| if ("org.apache.lucene.index.FreqProxTermsWriter".equals(trace[i].getClassName()) && "appendPostings".equals(trace[i].getMethodName())) |
| sawAppend = true; |
| if ("doFlush".equals(trace[i].getMethodName())) |
| sawFlush = true; |
| } |
| |
| if (sawAppend && sawFlush && count++ >= 30) { |
| doFail = false; |
| throw new IOException("now failing during flush"); |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1072: make sure an errant exception on flushing |
| // one segment only takes out those docs in that one flush |
| public void testDocumentsWriterAbort() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| FailOnlyOnFlush failure = new FailOnlyOnFlush(); |
| failure.setDoFail(); |
| dir.failOn(failure); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| Document doc = new Document(); |
| String contents = "aa bb cc dd ee ff gg hh ii jj kk"; |
| doc.add(new Field("content", contents, Field.Store.NO, |
| Field.Index.ANALYZED)); |
| boolean hitError = false; |
| for(int i=0;i<200;i++) { |
| try { |
| writer.addDocument(doc); |
| } catch (IOException ioe) { |
| // only one flush should fail: |
| assertFalse(hitError); |
| hitError = true; |
| } |
| } |
| assertTrue(hitError); |
| writer.close(); |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(198, reader.docFreq(new Term("content", "aa"))); |
| reader.close(); |
| } |
| |
| private class CrashingFilter extends TokenFilter { |
| String fieldName; |
| int count; |
| |
| public CrashingFilter(String fieldName, TokenStream input) { |
| super(input); |
| this.fieldName = fieldName; |
| } |
| |
| @Override |
| public boolean incrementToken() throws IOException { |
| if (this.fieldName.equals("crash") && count++ >= 4) |
| throw new IOException("I'm experiencing problems"); |
| return input.incrementToken(); |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| super.reset(); |
| count = 0; |
| } |
| } |
| |
| public void testDocumentsWriterExceptions() throws IOException { |
| Analyzer analyzer = new Analyzer() { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new CrashingFilter(fieldName, new WhitespaceTokenizer(reader)); |
| } |
| }; |
| |
| for(int i=0;i<2;i++) { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); |
| //writer.setInfoStream(System.out); |
| Document doc = new Document(); |
| doc.add(new Field("contents", "here are some contents", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| doc.add(new Field("crash", "this should crash after 4 terms", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| doc.add(new Field("other", "this will not get indexed", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| try { |
| writer.addDocument(doc); |
| fail("did not hit expected exception"); |
| } catch (IOException ioe) { |
| } |
| |
| if (0 == i) { |
| doc = new Document(); |
| doc.add(new Field("contents", "here are some contents", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| int expected = 3+(1-i)*2; |
| assertEquals(expected, reader.docFreq(new Term("contents", "here"))); |
| assertEquals(expected, reader.maxDoc()); |
| int numDel = 0; |
| for(int j=0;j<reader.maxDoc();j++) { |
| if (reader.isDeleted(j)) |
| numDel++; |
| else { |
| reader.document(j); |
| reader.getTermFreqVectors(j); |
| } |
| } |
| reader.close(); |
| |
| assertEquals(1, numDel); |
| |
| writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| doc = new Document(); |
| doc.add(new Field("contents", "here are some contents", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| for(int j=0;j<17;j++) |
| writer.addDocument(doc); |
| writer.optimize(); |
| writer.close(); |
| |
| reader = IndexReader.open(dir, true); |
| expected = 19+(1-i)*2; |
| assertEquals(expected, reader.docFreq(new Term("contents", "here"))); |
| assertEquals(expected, reader.maxDoc()); |
| numDel = 0; |
| for(int j=0;j<reader.maxDoc();j++) { |
| if (reader.isDeleted(j)) |
| numDel++; |
| else { |
| reader.document(j); |
| reader.getTermFreqVectors(j); |
| } |
| } |
| reader.close(); |
| assertEquals(0, numDel); |
| |
| dir.close(); |
| } |
| } |
| |
| public void testDocumentsWriterExceptionThreads() throws Exception { |
| Analyzer analyzer = new Analyzer() { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new CrashingFilter(fieldName, new WhitespaceTokenizer(reader)); |
| } |
| }; |
| |
| final int NUM_THREAD = 3; |
| final int NUM_ITER = 100; |
| |
| for(int i=0;i<2;i++) { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| |
| { |
| final IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); |
| |
| final int finalI = i; |
| |
| Thread[] threads = new Thread[NUM_THREAD]; |
| for(int t=0;t<NUM_THREAD;t++) { |
| threads[t] = new Thread() { |
| @Override |
| public void run() { |
| try { |
| for(int iter=0;iter<NUM_ITER;iter++) { |
| Document doc = new Document(); |
| doc.add(new Field("contents", "here are some contents", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| doc.add(new Field("crash", "this should crash after 4 terms", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| doc.add(new Field("other", "this will not get indexed", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| try { |
| writer.addDocument(doc); |
| fail("did not hit expected exception"); |
| } catch (IOException ioe) { |
| } |
| |
| if (0 == finalI) { |
| doc = new Document(); |
| doc.add(new Field("contents", "here are some contents", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| } |
| } |
| } catch (Throwable t) { |
| synchronized(this) { |
| System.out.println(Thread.currentThread().getName() + ": ERROR: hit unexpected exception"); |
| t.printStackTrace(System.out); |
| } |
| fail(); |
| } |
| } |
| }; |
| threads[t].start(); |
| } |
| |
| for(int t=0;t<NUM_THREAD;t++) |
| threads[t].join(); |
| |
| writer.close(); |
| } |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER; |
| assertEquals(expected, reader.docFreq(new Term("contents", "here"))); |
| assertEquals(expected, reader.maxDoc()); |
| int numDel = 0; |
| for(int j=0;j<reader.maxDoc();j++) { |
| if (reader.isDeleted(j)) |
| numDel++; |
| else { |
| reader.document(j); |
| reader.getTermFreqVectors(j); |
| } |
| } |
| reader.close(); |
| |
| assertEquals(NUM_THREAD*NUM_ITER, numDel); |
| |
| IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(10); |
| Document doc = new Document(); |
| doc.add(new Field("contents", "here are some contents", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| for(int j=0;j<17;j++) |
| writer.addDocument(doc); |
| writer.optimize(); |
| writer.close(); |
| |
| reader = IndexReader.open(dir, true); |
| expected += 17-NUM_THREAD*NUM_ITER; |
| assertEquals(expected, reader.docFreq(new Term("contents", "here"))); |
| assertEquals(expected, reader.maxDoc()); |
| numDel = 0; |
| for(int j=0;j<reader.maxDoc();j++) { |
| if (reader.isDeleted(j)) |
| numDel++; |
| else { |
| reader.document(j); |
| reader.getTermFreqVectors(j); |
| } |
| } |
| reader.close(); |
| assertEquals(0, numDel); |
| |
| dir.close(); |
| } |
| } |
| |
| public void testVariableSchema() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| int delID = 0; |
| for(int i=0;i<20;i++) { |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(2); |
| writer.setUseCompoundFile(false); |
| Document doc = new Document(); |
| String contents = "aa bb cc dd ee ff gg hh ii jj kk"; |
| |
| if (i == 7) { |
| // Add empty docs here |
| doc.add(new Field("content3", "", Field.Store.NO, |
| Field.Index.ANALYZED)); |
| } else { |
| Field.Store storeVal; |
| if (i%2 == 0) { |
| doc.add(new Field("content4", contents, Field.Store.YES, |
| Field.Index.ANALYZED)); |
| storeVal = Field.Store.YES; |
| } else |
| storeVal = Field.Store.NO; |
| doc.add(new Field("content1", contents, storeVal, |
| Field.Index.ANALYZED)); |
| doc.add(new Field("content3", "", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| doc.add(new Field("content5", "", storeVal, |
| Field.Index.ANALYZED)); |
| } |
| |
| for(int j=0;j<4;j++) |
| writer.addDocument(doc); |
| |
| writer.close(); |
| IndexReader reader = IndexReader.open(dir, false); |
| reader.deleteDocument(delID++); |
| reader.close(); |
| |
| if (0 == i % 4) { |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.setUseCompoundFile(false); |
| writer.optimize(); |
| writer.close(); |
| } |
| } |
| } |
| |
| public void testNoWaitClose() throws Throwable { |
| RAMDirectory directory = new MockRAMDirectory(); |
| |
| final Document doc = new Document(); |
| Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); |
| doc.add(idField); |
| |
| for(int pass=0;pass<2;pass++) { |
| |
| IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| |
| //System.out.println("TEST: pass=" + pass + " cms=" + (pass >= 2)); |
| for(int iter=0;iter<10;iter++) { |
| //System.out.println("TEST: iter=" + iter); |
| MergeScheduler ms; |
| if (pass == 1) |
| ms = new ConcurrentMergeScheduler(); |
| else |
| ms = new SerialMergeScheduler(); |
| |
| writer.setMergeScheduler(ms); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(100); |
| |
| for(int j=0;j<199;j++) { |
| idField.setValue(Integer.toString(iter*201+j)); |
| writer.addDocument(doc); |
| } |
| |
| int delID = iter*199; |
| for(int j=0;j<20;j++) { |
| writer.deleteDocuments(new Term("id", Integer.toString(delID))); |
| delID += 5; |
| } |
| |
| // Force a bunch of merge threads to kick off so we |
| // stress out aborting them on close: |
| writer.setMergeFactor(2); |
| |
| final IndexWriter finalWriter = writer; |
| final ArrayList failure = new ArrayList(); |
| Thread t1 = new Thread() { |
| @Override |
| public void run() { |
| boolean done = false; |
| while(!done) { |
| for(int i=0;i<100;i++) { |
| try { |
| finalWriter.addDocument(doc); |
| } catch (AlreadyClosedException e) { |
| done = true; |
| break; |
| } catch (NullPointerException e) { |
| done = true; |
| break; |
| } catch (Throwable e) { |
| e.printStackTrace(System.out); |
| failure.add(e); |
| done = true; |
| break; |
| } |
| } |
| Thread.yield(); |
| } |
| |
| } |
| }; |
| |
| if (failure.size() > 0) |
| throw (Throwable) failure.get(0); |
| |
| t1.start(); |
| |
| writer.close(false); |
| t1.join(); |
| |
| // Make sure reader can read |
| IndexReader reader = IndexReader.open(directory, true); |
| reader.close(); |
| |
| // Reopen |
| writer = new IndexWriter(directory, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); |
| } |
| writer.close(); |
| } |
| |
| directory.close(); |
| } |
| |
| // Used by test cases below |
| private class IndexerThread extends Thread { |
| |
| boolean diskFull; |
| Throwable error; |
| AlreadyClosedException ace; |
| IndexWriter writer; |
| boolean noErrors; |
| volatile int addCount; |
| |
| public IndexerThread(IndexWriter writer, boolean noErrors) { |
| this.writer = writer; |
| this.noErrors = noErrors; |
| } |
| |
| @Override |
| public void run() { |
| |
| final Document doc = new Document(); |
| doc.add(new Field("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| |
| int idUpto = 0; |
| int fullCount = 0; |
| final long stopTime = System.currentTimeMillis() + 200; |
| |
| do { |
| try { |
| writer.updateDocument(new Term("id", ""+(idUpto++)), doc); |
| addCount++; |
| } catch (IOException ioe) { |
| //System.out.println(Thread.currentThread().getName() + ": hit exc"); |
| //ioe.printStackTrace(System.out); |
| if (ioe.getMessage().startsWith("fake disk full at") || |
| ioe.getMessage().equals("now failing on purpose")) { |
| diskFull = true; |
| try { |
| Thread.sleep(1); |
| } catch (InterruptedException ie) { |
| throw new ThreadInterruptedException(ie); |
| } |
| if (fullCount++ >= 5) |
| break; |
| } else { |
| if (noErrors) { |
| System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected IOException:"); |
| ioe.printStackTrace(System.out); |
| error = ioe; |
| } |
| break; |
| } |
| } catch (Throwable t) { |
| //t.printStackTrace(System.out); |
| if (noErrors) { |
| System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected Throwable:"); |
| t.printStackTrace(System.out); |
| error = t; |
| } |
| break; |
| } |
| } while(System.currentTimeMillis() < stopTime); |
| } |
| } |
| |
| // LUCENE-1130: make sure we can close() even while |
| // threads are trying to add documents. Strictly |
| // speaking, this isn't valid us of Lucene's APIs, but we |
| // still want to be robust to this case: |
| public void testCloseWithThreads() throws Exception { |
| int NUM_THREADS = 3; |
| |
| for(int iter=0;iter<7;iter++) { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); |
| |
| // We expect AlreadyClosedException |
| cms.setSuppressExceptions(); |
| |
| writer.setMergeScheduler(cms); |
| writer.setMaxBufferedDocs(10); |
| writer.setMergeFactor(4); |
| |
| IndexerThread[] threads = new IndexerThread[NUM_THREADS]; |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i] = new IndexerThread(writer, false); |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i].start(); |
| |
| boolean done = false; |
| while(!done) { |
| Thread.sleep(100); |
| for(int i=0;i<NUM_THREADS;i++) |
| // only stop when at least one thread has added a doc |
| if (threads[i].addCount > 0) { |
| done = true; |
| break; |
| } |
| } |
| |
| writer.close(false); |
| |
| // Make sure threads that are adding docs are not hung: |
| for(int i=0;i<NUM_THREADS;i++) { |
| // Without fix for LUCENE-1130: one of the |
| // threads will hang |
| threads[i].join(); |
| if (threads[i].isAlive()) |
| fail("thread seems to be hung"); |
| } |
| |
| // Quick test to make sure index is not corrupt: |
| IndexReader reader = IndexReader.open(dir, true); |
| TermDocs tdocs = reader.termDocs(new Term("field", "aaa")); |
| int count = 0; |
| while(tdocs.next()) { |
| count++; |
| } |
| assertTrue(count > 0); |
| reader.close(); |
| |
| dir.close(); |
| } |
| } |
| |
| // LUCENE-1130: make sure immeidate disk full on creating |
| // an IndexWriter (hit during DW.ThreadState.init()) is |
| // OK: |
| public void testImmediateDiskFull() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| dir.setMaxSizeInBytes(dir.getRecomputedActualSizeInBytes()); |
| writer.setMaxBufferedDocs(2); |
| final Document doc = new Document(); |
| doc.add(new Field("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| try { |
| writer.addDocument(doc); |
| fail("did not hit disk full"); |
| } catch (IOException ioe) { |
| } |
| // Without fix for LUCENE-1130: this call will hang: |
| try { |
| writer.addDocument(doc); |
| fail("did not hit disk full"); |
| } catch (IOException ioe) { |
| } |
| try { |
| writer.close(false); |
| fail("did not hit disk full"); |
| } catch (IOException ioe) { |
| } |
| |
| // Make sure once disk space is avail again, we can |
| // cleanly close: |
| dir.setMaxSizeInBytes(0); |
| writer.close(false); |
| dir.close(); |
| } |
| |
| // LUCENE-1130: make sure immediate disk full on creating |
| // an IndexWriter (hit during DW.ThreadState.init()), with |
| // multiple threads, is OK: |
| public void testImmediateDiskFullWithThreads() throws Exception { |
| |
| int NUM_THREADS = 3; |
| |
| for(int iter=0;iter<10;iter++) { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); |
| // We expect disk full exceptions in the merge threads |
| cms.setSuppressExceptions(); |
| writer.setMergeScheduler(cms); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(4); |
| dir.setMaxSizeInBytes(4*1024+20*iter); |
| |
| IndexerThread[] threads = new IndexerThread[NUM_THREADS]; |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i] = new IndexerThread(writer, true); |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i].start(); |
| |
| for(int i=0;i<NUM_THREADS;i++) { |
| // Without fix for LUCENE-1130: one of the |
| // threads will hang |
| threads[i].join(); |
| assertTrue("hit unexpected Throwable", threads[i].error == null); |
| } |
| |
| // Make sure once disk space is avail again, we can |
| // cleanly close: |
| dir.setMaxSizeInBytes(0); |
| writer.close(false); |
| dir.close(); |
| } |
| } |
| |
| // Throws IOException during FieldsWriter.flushDocument and during DocumentsWriter.abort |
| private static class FailOnlyOnAbortOrFlush extends MockRAMDirectory.Failure { |
| private boolean onlyOnce; |
| public FailOnlyOnAbortOrFlush(boolean onlyOnce) { |
| this.onlyOnce = onlyOnce; |
| } |
| @Override |
| public void eval(MockRAMDirectory dir) throws IOException { |
| if (doFail) { |
| StackTraceElement[] trace = new Exception().getStackTrace(); |
| for (int i = 0; i < trace.length; i++) { |
| if ("abort".equals(trace[i].getMethodName()) || |
| "flushDocument".equals(trace[i].getMethodName())) { |
| if (onlyOnce) |
| doFail = false; |
| //System.out.println(Thread.currentThread().getName() + ": now fail"); |
| //new Throwable().printStackTrace(System.out); |
| throw new IOException("now failing on purpose"); |
| } |
| } |
| } |
| } |
| } |
| |
| // Runs test, with one thread, using the specific failure |
| // to trigger an IOException |
| public void _testSingleThreadFailure(MockRAMDirectory.Failure failure) throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| writer.setMaxBufferedDocs(2); |
| final Document doc = new Document(); |
| doc.add(new Field("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| |
| for(int i=0;i<6;i++) |
| writer.addDocument(doc); |
| |
| dir.failOn(failure); |
| failure.setDoFail(); |
| try { |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.commit(); |
| fail("did not hit exception"); |
| } catch (IOException ioe) { |
| } |
| failure.clearDoFail(); |
| writer.addDocument(doc); |
| writer.close(false); |
| } |
| |
| // Runs test, with multiple threads, using the specific |
| // failure to trigger an IOException |
| public void _testMultipleThreadsFailure(MockRAMDirectory.Failure failure) throws Exception { |
| |
| int NUM_THREADS = 3; |
| |
| for(int iter=0;iter<2;iter++) { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); |
| // We expect disk full exceptions in the merge threads |
| cms.setSuppressExceptions(); |
| writer.setMergeScheduler(cms); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(4); |
| |
| IndexerThread[] threads = new IndexerThread[NUM_THREADS]; |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i] = new IndexerThread(writer, true); |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i].start(); |
| |
| Thread.sleep(10); |
| |
| dir.failOn(failure); |
| failure.setDoFail(); |
| |
| for(int i=0;i<NUM_THREADS;i++) { |
| threads[i].join(); |
| assertTrue("hit unexpected Throwable", threads[i].error == null); |
| } |
| |
| boolean success = false; |
| try { |
| writer.close(false); |
| success = true; |
| } catch (IOException ioe) { |
| failure.clearDoFail(); |
| writer.close(false); |
| } |
| |
| if (success) { |
| IndexReader reader = IndexReader.open(dir, true); |
| for(int j=0;j<reader.maxDoc();j++) { |
| if (!reader.isDeleted(j)) { |
| reader.document(j); |
| reader.getTermFreqVectors(j); |
| } |
| } |
| reader.close(); |
| } |
| |
| dir.close(); |
| } |
| } |
| |
| // LUCENE-1130: make sure initial IOException, and then 2nd |
| // IOException during rollback(), is OK: |
| public void testIOExceptionDuringAbort() throws IOException { |
| _testSingleThreadFailure(new FailOnlyOnAbortOrFlush(false)); |
| } |
| |
| // LUCENE-1130: make sure initial IOException, and then 2nd |
| // IOException during rollback(), is OK: |
| public void testIOExceptionDuringAbortOnlyOnce() throws IOException { |
| _testSingleThreadFailure(new FailOnlyOnAbortOrFlush(true)); |
| } |
| |
| // LUCENE-1130: make sure initial IOException, and then 2nd |
| // IOException during rollback(), with multiple threads, is OK: |
| public void testIOExceptionDuringAbortWithThreads() throws Exception { |
| _testMultipleThreadsFailure(new FailOnlyOnAbortOrFlush(false)); |
| } |
| |
| // LUCENE-1130: make sure initial IOException, and then 2nd |
| // IOException during rollback(), with multiple threads, is OK: |
| public void testIOExceptionDuringAbortWithThreadsOnlyOnce() throws Exception { |
| _testMultipleThreadsFailure(new FailOnlyOnAbortOrFlush(true)); |
| } |
| |
| // Throws IOException during DocumentsWriter.closeDocStore |
| private static class FailOnlyInCloseDocStore extends MockRAMDirectory.Failure { |
| private boolean onlyOnce; |
| public FailOnlyInCloseDocStore(boolean onlyOnce) { |
| this.onlyOnce = onlyOnce; |
| } |
| @Override |
| public void eval(MockRAMDirectory dir) throws IOException { |
| if (doFail) { |
| StackTraceElement[] trace = new Exception().getStackTrace(); |
| for (int i = 0; i < trace.length; i++) { |
| if ("closeDocStore".equals(trace[i].getMethodName())) { |
| if (onlyOnce) |
| doFail = false; |
| throw new IOException("now failing on purpose"); |
| } |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1130: test IOException in closeDocStore |
| public void testIOExceptionDuringCloseDocStore() throws IOException { |
| _testSingleThreadFailure(new FailOnlyInCloseDocStore(false)); |
| } |
| |
| // LUCENE-1130: test IOException in closeDocStore |
| public void testIOExceptionDuringCloseDocStoreOnlyOnce() throws IOException { |
| _testSingleThreadFailure(new FailOnlyInCloseDocStore(true)); |
| } |
| |
| // LUCENE-1130: test IOException in closeDocStore, with threads |
| public void testIOExceptionDuringCloseDocStoreWithThreads() throws Exception { |
| _testMultipleThreadsFailure(new FailOnlyInCloseDocStore(false)); |
| } |
| |
| // LUCENE-1130: test IOException in closeDocStore, with threads |
| public void testIOExceptionDuringCloseDocStoreWithThreadsOnlyOnce() throws Exception { |
| _testMultipleThreadsFailure(new FailOnlyInCloseDocStore(true)); |
| } |
| |
| // Throws IOException during DocumentsWriter.writeSegment |
| private static class FailOnlyInWriteSegment extends MockRAMDirectory.Failure { |
| private boolean onlyOnce; |
| public FailOnlyInWriteSegment(boolean onlyOnce) { |
| this.onlyOnce = onlyOnce; |
| } |
| @Override |
| public void eval(MockRAMDirectory dir) throws IOException { |
| if (doFail) { |
| StackTraceElement[] trace = new Exception().getStackTrace(); |
| for (int i = 0; i < trace.length; i++) { |
| if ("flush".equals(trace[i].getMethodName()) && "org.apache.lucene.index.DocFieldProcessor".equals(trace[i].getClassName())) { |
| if (onlyOnce) |
| doFail = false; |
| throw new IOException("now failing on purpose"); |
| } |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1130: test IOException in writeSegment |
| public void testIOExceptionDuringWriteSegment() throws IOException { |
| _testSingleThreadFailure(new FailOnlyInWriteSegment(false)); |
| } |
| |
| // LUCENE-1130: test IOException in writeSegment |
| public void testIOExceptionDuringWriteSegmentOnlyOnce() throws IOException { |
| _testSingleThreadFailure(new FailOnlyInWriteSegment(true)); |
| } |
| |
| // LUCENE-1130: test IOException in writeSegment, with threads |
| public void testIOExceptionDuringWriteSegmentWithThreads() throws Exception { |
| _testMultipleThreadsFailure(new FailOnlyInWriteSegment(false)); |
| } |
| |
| // LUCENE-1130: test IOException in writeSegment, with threads |
| public void testIOExceptionDuringWriteSegmentWithThreadsOnlyOnce() throws Exception { |
| _testMultipleThreadsFailure(new FailOnlyInWriteSegment(true)); |
| } |
| |
| // LUCENE-1084: test unlimited field length |
| public void testUnlimitedMaxFieldLength() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| |
| Document doc = new Document(); |
| StringBuilder b = new StringBuilder(); |
| for(int i=0;i<10000;i++) |
| b.append(" a"); |
| b.append(" x"); |
| doc.add(new Field("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| Term t = new Term("field", "x"); |
| assertEquals(1, reader.docFreq(t)); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1044: Simulate checksum error in segments_N |
| public void testSegmentsChecksumError() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = null; |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| // add 100 documents |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| |
| // close |
| writer.close(); |
| |
| long gen = SegmentInfos.getCurrentSegmentGeneration(dir); |
| assertTrue("segment generation should be > 1 but got " + gen, gen > 1); |
| |
| final String segmentsFileName = SegmentInfos.getCurrentSegmentFileName(dir); |
| IndexInput in = dir.openInput(segmentsFileName); |
| IndexOutput out = dir.createOutput(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1+gen)); |
| out.copyBytes(in, in.length()-1); |
| byte b = in.readByte(); |
| out.writeByte((byte) (1+b)); |
| out.close(); |
| in.close(); |
| |
| IndexReader reader = null; |
| try { |
| reader = IndexReader.open(dir, true); |
| } catch (IOException e) { |
| e.printStackTrace(System.out); |
| fail("segmentInfos failed to retry fallback to correct segments_N file"); |
| } |
| reader.close(); |
| } |
| |
| // LUCENE-1044: test writer.commit() when ac=false |
| public void testForceCommit() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(5); |
| |
| for (int i = 0; i < 23; i++) |
| addDoc(writer); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| writer.commit(); |
| IndexReader reader2 = reader.reopen(); |
| assertEquals(0, reader.numDocs()); |
| assertEquals(23, reader2.numDocs()); |
| reader.close(); |
| |
| for (int i = 0; i < 17; i++) |
| addDoc(writer); |
| assertEquals(23, reader2.numDocs()); |
| reader2.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| writer.commit(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(40, reader.numDocs()); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // Throws IOException during MockRAMDirectory.sync |
| private static class FailOnlyInSync extends MockRAMDirectory.Failure { |
| boolean didFail; |
| @Override |
| public void eval(MockRAMDirectory dir) throws IOException { |
| if (doFail) { |
| StackTraceElement[] trace = new Exception().getStackTrace(); |
| for (int i = 0; i < trace.length; i++) { |
| if (doFail && "org.apache.lucene.store.MockRAMDirectory".equals(trace[i].getClassName()) && "sync".equals(trace[i].getMethodName())) { |
| didFail = true; |
| throw new IOException("now failing on purpose during sync"); |
| } |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1044: test exception during sync |
| public void testExceptionDuringSync() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| FailOnlyInSync failure = new FailOnlyInSync(); |
| dir.failOn(failure); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| failure.setDoFail(); |
| |
| ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); |
| writer.setMergeScheduler(cms); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(5); |
| |
| for (int i = 0; i < 23; i++) { |
| addDoc(writer); |
| if ((i-1)%2 == 0) { |
| try { |
| writer.commit(); |
| } catch (IOException ioe) { |
| // expected |
| } |
| } |
| } |
| |
| cms.sync(); |
| assertTrue(failure.didFail); |
| failure.clearDoFail(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1168 |
| public void testTermVectorCorruption() throws IOException { |
| |
| Directory dir = new MockRAMDirectory(); |
| for(int iter=0;iter<2;iter++) { |
| IndexWriter writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.UNLIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| writer.setMergeScheduler(new SerialMergeScheduler()); |
| writer.setMergePolicy(new LogDocMergePolicy()); |
| |
| Document document = new Document(); |
| |
| Field storedField = new Field("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| writer.addDocument(document); |
| writer.addDocument(document); |
| |
| document = new Document(); |
| document.add(storedField); |
| Field termVectorField = new Field("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| |
| document.add(termVectorField); |
| writer.addDocument(document); |
| writer.optimize(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| for(int i=0;i<reader.numDocs();i++) { |
| reader.document(i); |
| reader.getTermFreqVectors(i); |
| } |
| reader.close(); |
| |
| writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.UNLIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| writer.setMergeScheduler(new SerialMergeScheduler()); |
| writer.setMergePolicy(new LogDocMergePolicy()); |
| |
| Directory[] indexDirs = {new MockRAMDirectory(dir)}; |
| writer.addIndexesNoOptimize(indexDirs); |
| writer.optimize(); |
| writer.close(); |
| } |
| dir.close(); |
| } |
| |
| // LUCENE-1168 |
| public void testTermVectorCorruption2() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| for(int iter=0;iter<2;iter++) { |
| IndexWriter writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.UNLIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| writer.setMergeScheduler(new SerialMergeScheduler()); |
| writer.setMergePolicy(new LogDocMergePolicy()); |
| |
| Document document = new Document(); |
| |
| Field storedField = new Field("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| writer.addDocument(document); |
| writer.addDocument(document); |
| |
| document = new Document(); |
| document.add(storedField); |
| Field termVectorField = new Field("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| writer.addDocument(document); |
| writer.optimize(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertTrue(reader.getTermFreqVectors(0)==null); |
| assertTrue(reader.getTermFreqVectors(1)==null); |
| assertTrue(reader.getTermFreqVectors(2)!=null); |
| reader.close(); |
| } |
| dir.close(); |
| } |
| |
| // LUCENE-1168 |
| public void testTermVectorCorruption3() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| writer.setMergeScheduler(new SerialMergeScheduler()); |
| writer.setMergePolicy(new LogDocMergePolicy()); |
| |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = new Field("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| Field termVectorField = new Field("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<10;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| writer.setMergeScheduler(new SerialMergeScheduler()); |
| writer.setMergePolicy(new LogDocMergePolicy()); |
| for(int i=0;i<6;i++) |
| writer.addDocument(document); |
| |
| writer.optimize(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| for(int i=0;i<10;i++) { |
| reader.getTermFreqVectors(i); |
| reader.document(i); |
| } |
| reader.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1084: test user-specified field length |
| public void testUserSpecifiedMaxFieldLength() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new IndexWriter.MaxFieldLength(100000)); |
| |
| Document doc = new Document(); |
| StringBuilder b = new StringBuilder(); |
| for(int i=0;i<10000;i++) |
| b.append(" a"); |
| b.append(" x"); |
| doc.add(new Field("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| Term t = new Term("field", "x"); |
| assertEquals(1, reader.docFreq(t)); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-325: test expungeDeletes, when 2 singular merges |
| // are required |
| public void testExpungeDeletes() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = new Field("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| Field termVectorField = new Field("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<10;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| IndexReader ir = IndexReader.open(dir, false); |
| assertEquals(10, ir.maxDoc()); |
| assertEquals(10, ir.numDocs()); |
| ir.deleteDocument(0); |
| ir.deleteDocument(7); |
| assertEquals(8, ir.numDocs()); |
| ir.close(); |
| |
| writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| assertEquals(8, writer.numDocs()); |
| assertEquals(10, writer.maxDoc()); |
| writer.expungeDeletes(); |
| assertEquals(8, writer.numDocs()); |
| writer.close(); |
| ir = IndexReader.open(dir, true); |
| assertEquals(8, ir.maxDoc()); |
| assertEquals(8, ir.numDocs()); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-325: test expungeDeletes, when many adjacent merges are required |
| public void testExpungeDeletes2() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(50); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = new Field("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| Field termVectorField = new Field("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<98;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| IndexReader ir = IndexReader.open(dir, false); |
| assertEquals(98, ir.maxDoc()); |
| assertEquals(98, ir.numDocs()); |
| for(int i=0;i<98;i+=2) |
| ir.deleteDocument(i); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| |
| writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMergeFactor(3); |
| assertEquals(49, writer.numDocs()); |
| writer.expungeDeletes(); |
| writer.close(); |
| ir = IndexReader.open(dir, true); |
| assertEquals(49, ir.maxDoc()); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-325: test expungeDeletes without waiting, when |
| // many adjacent merges are required |
| public void testExpungeDeletes3() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(50); |
| writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); |
| |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = new Field("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| Field termVectorField = new Field("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<98;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| IndexReader ir = IndexReader.open(dir, false); |
| assertEquals(98, ir.maxDoc()); |
| assertEquals(98, ir.numDocs()); |
| for(int i=0;i<98;i+=2) |
| ir.deleteDocument(i); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| |
| writer = new IndexWriter(dir, |
| new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), |
| IndexWriter.MaxFieldLength.LIMITED); |
| // Force many merges to happen |
| writer.setMergeFactor(3); |
| writer.expungeDeletes(false); |
| writer.close(); |
| ir = IndexReader.open(dir, true); |
| assertEquals(49, ir.maxDoc()); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1179 |
| public void testEmptyFieldName() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| writer.close(); |
| } |
| |
| // LUCENE-1198 |
| public class MockIndexWriter extends IndexWriter { |
| |
| public MockIndexWriter(Directory dir, Analyzer a, boolean create, MaxFieldLength mfl) throws IOException { |
| super(dir, a, create, mfl); |
| } |
| |
| boolean doFail; |
| |
| @Override |
| boolean testPoint(String name) { |
| if (doFail && name.equals("DocumentsWriter.ThreadState.init start")) |
| throw new RuntimeException("intentionally failing"); |
| return true; |
| } |
| } |
| |
| public void testExceptionDocumentsWriterInit() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| MockIndexWriter w = new MockIndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("field", "a field", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| w.doFail = true; |
| try { |
| w.addDocument(doc); |
| fail("did not hit exception"); |
| } catch (RuntimeException re) { |
| // expected |
| } |
| w.close(); |
| _TestUtil.checkIndex(dir); |
| dir.close(); |
| } |
| |
| // LUCENE-1208 |
| public void testExceptionJustBeforeFlush() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| MockIndexWriter w = new MockIndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| w.setMaxBufferedDocs(2); |
| Document doc = new Document(); |
| doc.add(new Field("field", "a field", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| |
| Analyzer analyzer = new Analyzer() { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new CrashingFilter(fieldName, new WhitespaceTokenizer(reader)); |
| } |
| }; |
| |
| Document crashDoc = new Document(); |
| crashDoc.add(new Field("crash", "do it on token 4", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| try { |
| w.addDocument(crashDoc, analyzer); |
| fail("did not hit expected exception"); |
| } catch (IOException ioe) { |
| // expected |
| } |
| w.addDocument(doc); |
| w.close(); |
| dir.close(); |
| } |
| |
| public class MockIndexWriter2 extends IndexWriter { |
| |
| public MockIndexWriter2(Directory dir, Analyzer a, boolean create, MaxFieldLength mfl) throws IOException { |
| super(dir, a, create, mfl); |
| } |
| |
| boolean doFail; |
| boolean failed; |
| |
| @Override |
| boolean testPoint(String name) { |
| if (doFail && name.equals("startMergeInit")) { |
| failed = true; |
| throw new RuntimeException("intentionally failing"); |
| } |
| return true; |
| } |
| } |
| |
| // LUCENE-1210 |
| public void testExceptionOnMergeInit() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| MockIndexWriter2 w = new MockIndexWriter2(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| w.setMaxBufferedDocs(2); |
| w.setMergeFactor(2); |
| w.doFail = true; |
| w.setMergeScheduler(new ConcurrentMergeScheduler()); |
| Document doc = new Document(); |
| doc.add(new Field("field", "a field", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| for(int i=0;i<10;i++) |
| try { |
| w.addDocument(doc); |
| } catch (RuntimeException re) { |
| break; |
| } |
| |
| ((ConcurrentMergeScheduler) w.getMergeScheduler()).sync(); |
| assertTrue(w.failed); |
| w.close(); |
| dir.close(); |
| } |
| |
| public class MockIndexWriter3 extends IndexWriter { |
| |
| public MockIndexWriter3(Directory dir, Analyzer a, boolean create, IndexWriter.MaxFieldLength mfl) throws IOException { |
| super(dir, a, create, mfl); |
| } |
| |
| boolean wasCalled; |
| |
| @Override |
| public void doAfterFlush() { |
| wasCalled = true; |
| } |
| } |
| |
| // LUCENE-1222 |
| public void testDoAfterFlush() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| MockIndexWriter3 w = new MockIndexWriter3(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("field", "a field", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| w.commit(); |
| assertTrue(w.wasCalled); |
| w.wasCalled = true; |
| w.deleteDocuments(new Term("field", "field")); |
| w.commit(); |
| assertTrue(w.wasCalled); |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| assertEquals(1, ir.maxDoc()); |
| assertEquals(0, ir.numDocs()); |
| ir.close(); |
| |
| dir.close(); |
| } |
| |
| private static class FailOnlyInCommit extends MockRAMDirectory.Failure { |
| |
| boolean fail1, fail2; |
| |
| @Override |
| public void eval(MockRAMDirectory dir) throws IOException { |
| StackTraceElement[] trace = new Exception().getStackTrace(); |
| boolean isCommit = false; |
| boolean isDelete = false; |
| for (int i = 0; i < trace.length; i++) { |
| if ("org.apache.lucene.index.SegmentInfos".equals(trace[i].getClassName()) && "prepareCommit".equals(trace[i].getMethodName())) |
| isCommit = true; |
| if ("org.apache.lucene.store.MockRAMDirectory".equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName())) |
| isDelete = true; |
| } |
| |
| if (isCommit) { |
| if (!isDelete) { |
| fail1 = true; |
| throw new RuntimeException("now fail first"); |
| } else { |
| fail2 = true; |
| throw new IOException("now fail during delete"); |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1214 |
| public void testExceptionsDuringCommit() throws Throwable { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| FailOnlyInCommit failure = new FailOnlyInCommit(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("field", "a field", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| dir.failOn(failure); |
| try { |
| w.close(); |
| fail(); |
| } catch (IOException ioe) { |
| fail("expected only RuntimeException"); |
| } catch (RuntimeException re) { |
| // Expected |
| } |
| assertTrue(failure.fail1 && failure.fail2); |
| w.rollback(); |
| dir.close(); |
| } |
| |
| final String[] utf8Data = new String[] { |
| // unpaired low surrogate |
| "ab\udc17cd", "ab\ufffdcd", |
| "\udc17abcd", "\ufffdabcd", |
| "\udc17", "\ufffd", |
| "ab\udc17\udc17cd", "ab\ufffd\ufffdcd", |
| "\udc17\udc17abcd", "\ufffd\ufffdabcd", |
| "\udc17\udc17", "\ufffd\ufffd", |
| |
| // unpaired high surrogate |
| "ab\ud917cd", "ab\ufffdcd", |
| "\ud917abcd", "\ufffdabcd", |
| "\ud917", "\ufffd", |
| "ab\ud917\ud917cd", "ab\ufffd\ufffdcd", |
| "\ud917\ud917abcd", "\ufffd\ufffdabcd", |
| "\ud917\ud917", "\ufffd\ufffd", |
| |
| // backwards surrogates |
| "ab\udc17\ud917cd", "ab\ufffd\ufffdcd", |
| "\udc17\ud917abcd", "\ufffd\ufffdabcd", |
| "\udc17\ud917", "\ufffd\ufffd", |
| "ab\udc17\ud917\udc17\ud917cd", "ab\ufffd\ud917\udc17\ufffdcd", |
| "\udc17\ud917\udc17\ud917abcd", "\ufffd\ud917\udc17\ufffdabcd", |
| "\udc17\ud917\udc17\ud917", "\ufffd\ud917\udc17\ufffd" |
| }; |
| |
| // LUCENE-510 |
| public void testInvalidUTF16() throws Throwable { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| |
| final int count = utf8Data.length/2; |
| for(int i=0;i<count;i++) |
| doc.add(new Field("f" + i, utf8Data[2*i], Field.Store.YES, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| Document doc2 = ir.document(0); |
| for(int i=0;i<count;i++) { |
| assertEquals("field " + i + " was not indexed correctly", 1, ir.docFreq(new Term("f"+i, utf8Data[2*i+1]))); |
| assertEquals("field " + i + " is incorrect", utf8Data[2*i+1], doc2.getField("f"+i).stringValue()); |
| } |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-510 |
| public void testAllUnicodeChars() throws Throwable { |
| |
| UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); |
| UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); |
| char[] chars = new char[2]; |
| for(int ch=0;ch<0x0010FFFF;ch++) { |
| |
| if (ch == 0xd800) |
| // Skip invalid code points |
| ch = 0xe000; |
| |
| int len = 0; |
| if (ch <= 0xffff) { |
| chars[len++] = (char) ch; |
| } else { |
| chars[len++] = (char) (((ch-0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START); |
| chars[len++] = (char) (((ch-0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START); |
| } |
| |
| UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); |
| |
| String s1 = new String(chars, 0, len); |
| String s2 = new String(utf8.result, 0, utf8.length, "UTF-8"); |
| assertEquals("codepoint " + ch, s1, s2); |
| |
| UnicodeUtil.UTF8toUTF16(utf8.result, 0, utf8.length, utf16); |
| assertEquals("codepoint " + ch, s1, new String(utf16.result, 0, utf16.length)); |
| |
| byte[] b = s1.getBytes("UTF-8"); |
| assertEquals(utf8.length, b.length); |
| for(int j=0;j<utf8.length;j++) |
| assertEquals(utf8.result[j], b[j]); |
| } |
| } |
| |
| Random r; |
| |
| private int nextInt(int lim) { |
| return r.nextInt(lim); |
| } |
| |
| private int nextInt(int start, int end) { |
| return start + nextInt(end-start); |
| } |
| |
| private boolean fillUnicode(char[] buffer, char[] expected, int offset, int count) { |
| final int len = offset + count; |
| boolean hasIllegal = false; |
| |
| if (offset > 0 && buffer[offset] >= 0xdc00 && buffer[offset] < 0xe000) |
| // Don't start in the middle of a valid surrogate pair |
| offset--; |
| |
| for(int i=offset;i<len;i++) { |
| int t = nextInt(6); |
| if (0 == t && i < len-1) { |
| // Make a surrogate pair |
| // High surrogate |
| expected[i] = buffer[i++] = (char) nextInt(0xd800, 0xdc00); |
| // Low surrogate |
| expected[i] = buffer[i] = (char) nextInt(0xdc00, 0xe000); |
| } else if (t <= 1) |
| expected[i] = buffer[i] = (char) nextInt(0x80); |
| else if (2 == t) |
| expected[i] = buffer[i] = (char) nextInt(0x80, 0x800); |
| else if (3 == t) |
| expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); |
| else if (4 == t) |
| expected[i] = buffer[i] = (char) nextInt(0xe000, 0xffff); |
| else if (5 == t && i < len-1) { |
| // Illegal unpaired surrogate |
| if (nextInt(10) == 7) { |
| if (r.nextBoolean()) |
| buffer[i] = (char) nextInt(0xd800, 0xdc00); |
| else |
| buffer[i] = (char) nextInt(0xdc00, 0xe000); |
| expected[i++] = 0xfffd; |
| expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); |
| hasIllegal = true; |
| } else |
| expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); |
| } else { |
| expected[i] = buffer[i] = ' '; |
| } |
| } |
| |
| return hasIllegal; |
| } |
| |
| // LUCENE-510 |
| public void testRandomUnicodeStrings() throws Throwable { |
| r = newRandom(); |
| |
| char[] buffer = new char[20]; |
| char[] expected = new char[20]; |
| |
| UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); |
| UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); |
| |
| for(int iter=0;iter<100000;iter++) { |
| boolean hasIllegal = fillUnicode(buffer, expected, 0, 20); |
| |
| UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8); |
| if (!hasIllegal) { |
| byte[] b = new String(buffer, 0, 20).getBytes("UTF-8"); |
| assertEquals(b.length, utf8.length); |
| for(int i=0;i<b.length;i++) |
| assertEquals(b[i], utf8.result[i]); |
| } |
| |
| UnicodeUtil.UTF8toUTF16(utf8.result, 0, utf8.length, utf16); |
| assertEquals(utf16.length, 20); |
| for(int i=0;i<20;i++) |
| assertEquals(expected[i], utf16.result[i]); |
| } |
| } |
| |
| // LUCENE-510 |
| public void testIncrementalUnicodeStrings() throws Throwable { |
| r = newRandom(); |
| char[] buffer = new char[20]; |
| char[] expected = new char[20]; |
| |
| UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); |
| UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); |
| UnicodeUtil.UTF16Result utf16a = new UnicodeUtil.UTF16Result(); |
| |
| boolean hasIllegal = false; |
| byte[] last = new byte[60]; |
| |
| for(int iter=0;iter<100000;iter++) { |
| |
| final int prefix; |
| |
| if (iter == 0 || hasIllegal) |
| prefix = 0; |
| else |
| prefix = nextInt(20); |
| |
| hasIllegal = fillUnicode(buffer, expected, prefix, 20-prefix); |
| |
| UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8); |
| if (!hasIllegal) { |
| byte[] b = new String(buffer, 0, 20).getBytes("UTF-8"); |
| assertEquals(b.length, utf8.length); |
| for(int i=0;i<b.length;i++) |
| assertEquals(b[i], utf8.result[i]); |
| } |
| |
| int bytePrefix = 20; |
| if (iter == 0 || hasIllegal) |
| bytePrefix = 0; |
| else |
| for(int i=0;i<20;i++) |
| if (last[i] != utf8.result[i]) { |
| bytePrefix = i; |
| break; |
| } |
| System.arraycopy(utf8.result, 0, last, 0, utf8.length); |
| |
| UnicodeUtil.UTF8toUTF16(utf8.result, bytePrefix, utf8.length-bytePrefix, utf16); |
| assertEquals(20, utf16.length); |
| for(int i=0;i<20;i++) |
| assertEquals(expected[i], utf16.result[i]); |
| |
| UnicodeUtil.UTF8toUTF16(utf8.result, 0, utf8.length, utf16a); |
| assertEquals(20, utf16a.length); |
| for(int i=0;i<20;i++) |
| assertEquals(expected[i], utf16a.result[i]); |
| } |
| } |
| |
| // LUCENE-1255 |
| public void testNegativePositions() throws Throwable { |
| final TokenStream tokens = new TokenStream() { |
| final TermAttribute termAtt = addAttribute(TermAttribute.class); |
| final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| |
| final Iterator tokens = Arrays.asList(new String[]{"a","b","c"}).iterator(); |
| boolean first = true; |
| |
| @Override |
| public boolean incrementToken() { |
| if (!tokens.hasNext()) return false; |
| clearAttributes(); |
| termAtt.setTermBuffer((String) tokens.next()); |
| posIncrAtt.setPositionIncrement(first ? 0 : 1); |
| first = false; |
| return true; |
| } |
| }; |
| |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("field", tokens)); |
| w.addDocument(doc); |
| w.commit(); |
| |
| IndexSearcher s = new IndexSearcher(dir, false); |
| PhraseQuery pq = new PhraseQuery(); |
| pq.add(new Term("field", "a")); |
| pq.add(new Term("field", "b")); |
| pq.add(new Term("field", "c")); |
| ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs; |
| assertEquals(1, hits.length); |
| |
| Query q = new SpanTermQuery(new Term("field", "a")); |
| hits = s.search(q, null, 1000).scoreDocs; |
| assertEquals(1, hits.length); |
| TermPositions tps = s.getIndexReader().termPositions(new Term("field", "a")); |
| assertTrue(tps.next()); |
| assertEquals(1, tps.freq()); |
| assertEquals(0, tps.nextPosition()); |
| w.close(); |
| |
| assertTrue(_TestUtil.checkIndex(dir)); |
| s.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1274: test writer.prepareCommit() |
| public void testPrepareCommit() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(5); |
| |
| for (int i = 0; i < 23; i++) |
| addDoc(writer); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| |
| writer.prepareCommit(); |
| |
| IndexReader reader2 = IndexReader.open(dir, true); |
| assertEquals(0, reader2.numDocs()); |
| |
| writer.commit(); |
| |
| IndexReader reader3 = reader.reopen(); |
| assertEquals(0, reader.numDocs()); |
| assertEquals(0, reader2.numDocs()); |
| assertEquals(23, reader3.numDocs()); |
| reader.close(); |
| reader2.close(); |
| |
| for (int i = 0; i < 17; i++) |
| addDoc(writer); |
| |
| assertEquals(23, reader3.numDocs()); |
| reader3.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| |
| writer.prepareCommit(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| |
| writer.commit(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(40, reader.numDocs()); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1274: test writer.prepareCommit() |
| public void testPrepareCommitRollback() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| dir.setPreventDoubleWrite(false); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| |
| writer.setMaxBufferedDocs(2); |
| writer.setMergeFactor(5); |
| |
| for (int i = 0; i < 23; i++) |
| addDoc(writer); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| |
| writer.prepareCommit(); |
| |
| IndexReader reader2 = IndexReader.open(dir, true); |
| assertEquals(0, reader2.numDocs()); |
| |
| writer.rollback(); |
| |
| IndexReader reader3 = reader.reopen(); |
| assertEquals(0, reader.numDocs()); |
| assertEquals(0, reader2.numDocs()); |
| assertEquals(0, reader3.numDocs()); |
| reader.close(); |
| reader2.close(); |
| |
| writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| for (int i = 0; i < 17; i++) |
| addDoc(writer); |
| |
| assertEquals(0, reader3.numDocs()); |
| reader3.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer.prepareCommit(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer.commit(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(17, reader.numDocs()); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1274 |
| public void testPrepareCommitNoChanges() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.prepareCommit(); |
| writer.commit(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| private abstract static class RunAddIndexesThreads { |
| |
| Directory dir, dir2; |
| final static int NUM_INIT_DOCS = 17; |
| IndexWriter writer2; |
| final List failures = new ArrayList(); |
| volatile boolean didClose; |
| final IndexReader[] readers; |
| final int NUM_COPY; |
| final static int NUM_THREADS = 5; |
| final Thread[] threads = new Thread[NUM_THREADS]; |
| final ConcurrentMergeScheduler cms; |
| |
| public RunAddIndexesThreads(int numCopy) throws Throwable { |
| NUM_COPY = numCopy; |
| dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer.setMaxBufferedDocs(2); |
| for (int i = 0; i < NUM_INIT_DOCS; i++) |
| addDoc(writer); |
| writer.close(); |
| |
| dir2 = new MockRAMDirectory(); |
| writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| cms = (ConcurrentMergeScheduler) writer2.getMergeScheduler(); |
| |
| readers = new IndexReader[NUM_COPY]; |
| for(int i=0;i<NUM_COPY;i++) |
| readers[i] = IndexReader.open(dir, true); |
| } |
| |
| void launchThreads(final int numIter) { |
| |
| for(int i=0;i<NUM_THREADS;i++) { |
| threads[i] = new Thread() { |
| @Override |
| public void run() { |
| try { |
| |
| final Directory[] dirs = new Directory[NUM_COPY]; |
| for(int k=0;k<NUM_COPY;k++) |
| dirs[k] = new MockRAMDirectory(dir); |
| |
| int j=0; |
| |
| while(true) { |
| // System.out.println(Thread.currentThread().getName() + ": iter j=" + j); |
| if (numIter > 0 && j == numIter) |
| break; |
| doBody(j++, dirs); |
| } |
| } catch (Throwable t) { |
| handle(t); |
| } |
| } |
| }; |
| } |
| |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i].start(); |
| } |
| |
| void joinThreads() throws Exception { |
| for(int i=0;i<NUM_THREADS;i++) |
| threads[i].join(); |
| } |
| |
| void close(boolean doWait) throws Throwable { |
| didClose = true; |
| writer2.close(doWait); |
| } |
| |
| void closeDir() throws Throwable { |
| for(int i=0;i<NUM_COPY;i++) |
| readers[i].close(); |
| dir2.close(); |
| } |
| |
| abstract void doBody(int j, Directory[] dirs) throws Throwable; |
| abstract void handle(Throwable t); |
| } |
| |
| private class CommitAndAddIndexes extends RunAddIndexesThreads { |
| public CommitAndAddIndexes(int numCopy) throws Throwable { |
| super(numCopy); |
| } |
| |
| @Override |
| void handle(Throwable t) { |
| t.printStackTrace(System.out); |
| synchronized(failures) { |
| failures.add(t); |
| } |
| } |
| |
| @Override |
| void doBody(int j, Directory[] dirs) throws Throwable { |
| switch(j%4) { |
| case 0: |
| writer2.addIndexesNoOptimize(dirs); |
| writer2.optimize(); |
| break; |
| case 1: |
| writer2.addIndexesNoOptimize(dirs); |
| break; |
| case 2: |
| writer2.addIndexes(readers); |
| break; |
| case 3: |
| writer2.commit(); |
| } |
| } |
| } |
| |
| // LUCENE-1335: test simultaneous addIndexes & commits |
| // from multiple threads |
| public void testAddIndexesWithThreads() throws Throwable { |
| |
| final int NUM_ITER = 12; |
| final int NUM_COPY = 3; |
| CommitAndAddIndexes c = new CommitAndAddIndexes(NUM_COPY); |
| c.launchThreads(NUM_ITER); |
| |
| for(int i=0;i<100;i++) |
| addDoc(c.writer2); |
| |
| c.joinThreads(); |
| |
| assertEquals(100+NUM_COPY*(3*NUM_ITER/4)*c.NUM_THREADS*c.NUM_INIT_DOCS, c.writer2.numDocs()); |
| |
| c.close(true); |
| |
| assertTrue(c.failures.size() == 0); |
| |
| _TestUtil.checkIndex(c.dir2); |
| |
| IndexReader reader = IndexReader.open(c.dir2, true); |
| assertEquals(100+NUM_COPY*(3*NUM_ITER/4)*c.NUM_THREADS*c.NUM_INIT_DOCS, reader.numDocs()); |
| reader.close(); |
| |
| c.closeDir(); |
| } |
| |
| private class CommitAndAddIndexes2 extends CommitAndAddIndexes { |
| public CommitAndAddIndexes2(int numCopy) throws Throwable { |
| super(numCopy); |
| } |
| |
| @Override |
| void handle(Throwable t) { |
| if (!(t instanceof AlreadyClosedException) && !(t instanceof NullPointerException)) { |
| t.printStackTrace(System.out); |
| synchronized(failures) { |
| failures.add(t); |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1335: test simultaneous addIndexes & close |
| public void testAddIndexesWithClose() throws Throwable { |
| final int NUM_COPY = 3; |
| CommitAndAddIndexes2 c = new CommitAndAddIndexes2(NUM_COPY); |
| //c.writer2.setInfoStream(System.out); |
| c.launchThreads(-1); |
| |
| // Close w/o first stopping/joining the threads |
| c.close(true); |
| //c.writer2.close(); |
| |
| c.joinThreads(); |
| |
| _TestUtil.checkIndex(c.dir2); |
| |
| c.closeDir(); |
| |
| assertTrue(c.failures.size() == 0); |
| } |
| |
| private class CommitAndAddIndexes3 extends RunAddIndexesThreads { |
| public CommitAndAddIndexes3(int numCopy) throws Throwable { |
| super(numCopy); |
| } |
| |
| @Override |
| void doBody(int j, Directory[] dirs) throws Throwable { |
| switch(j%5) { |
| case 0: |
| writer2.addIndexesNoOptimize(dirs); |
| writer2.optimize(); |
| break; |
| case 1: |
| writer2.addIndexesNoOptimize(dirs); |
| break; |
| case 2: |
| writer2.addIndexes(readers); |
| break; |
| case 3: |
| writer2.optimize(); |
| case 4: |
| writer2.commit(); |
| } |
| } |
| |
| @Override |
| void handle(Throwable t) { |
| boolean report = true; |
| |
| if (t instanceof AlreadyClosedException || t instanceof MergePolicy.MergeAbortedException || t instanceof NullPointerException) { |
| report = !didClose; |
| } else if (t instanceof IOException) { |
| Throwable t2 = t.getCause(); |
| if (t2 instanceof MergePolicy.MergeAbortedException) { |
| report = !didClose; |
| } |
| } |
| if (report) { |
| t.printStackTrace(System.out); |
| synchronized(failures) { |
| failures.add(t); |
| } |
| } |
| } |
| } |
| |
| // LUCENE-1335: test simultaneous addIndexes & close |
| public void testAddIndexesWithCloseNoWait() throws Throwable { |
| |
| final int NUM_COPY = 50; |
| CommitAndAddIndexes3 c = new CommitAndAddIndexes3(NUM_COPY); |
| c.launchThreads(-1); |
| |
| Thread.sleep(500); |
| |
| // Close w/o first stopping/joining the threads |
| c.close(false); |
| |
| c.joinThreads(); |
| |
| _TestUtil.checkIndex(c.dir2); |
| |
| c.closeDir(); |
| |
| assertTrue(c.failures.size() == 0); |
| } |
| |
| // LUCENE-1335: test simultaneous addIndexes & close |
| public void testAddIndexesWithRollback() throws Throwable { |
| |
| final int NUM_COPY = 50; |
| CommitAndAddIndexes3 c = new CommitAndAddIndexes3(NUM_COPY); |
| c.launchThreads(-1); |
| |
| Thread.sleep(500); |
| |
| // Close w/o first stopping/joining the threads |
| c.didClose = true; |
| c.writer2.rollback(); |
| |
| c.joinThreads(); |
| |
| _TestUtil.checkIndex(c.dir2); |
| |
| c.closeDir(); |
| |
| assertTrue(c.failures.size() == 0); |
| } |
| |
| // LUCENE-1347 |
| public class MockIndexWriter4 extends IndexWriter { |
| |
| public MockIndexWriter4(Directory dir, Analyzer a, boolean create, MaxFieldLength mfl) throws IOException { |
| super(dir, a, create, mfl); |
| } |
| |
| boolean doFail; |
| |
| @Override |
| boolean testPoint(String name) { |
| if (doFail && name.equals("rollback before checkpoint")) |
| throw new RuntimeException("intentionally failing"); |
| return true; |
| } |
| } |
| |
| // LUCENE-1347 |
| public void testRollbackExceptionHang() throws Throwable { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| MockIndexWriter4 w = new MockIndexWriter4(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| addDoc(w); |
| w.doFail = true; |
| try { |
| w.rollback(); |
| fail("did not hit intentional RuntimeException"); |
| } catch (RuntimeException re) { |
| // expected |
| } |
| |
| w.doFail = false; |
| w.rollback(); |
| } |
| |
| |
| // LUCENE-1219 |
| public void testBinaryFieldOffsetLength() throws IOException { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| byte[] b = new byte[50]; |
| for(int i=0;i<50;i++) |
| b[i] = (byte) (i+77); |
| |
| Document doc = new Document(); |
| Field f = new Field("binary", b, 10, 17, Field.Store.YES); |
| byte[] bx = f.getBinaryValue(); |
| assertTrue(bx != null); |
| assertEquals(50, bx.length); |
| assertEquals(10, f.getBinaryOffset()); |
| assertEquals(17, f.getBinaryLength()); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| doc = ir.document(0); |
| f = doc.getField("binary"); |
| b = f.getBinaryValue(); |
| assertTrue(b != null); |
| assertEquals(17, b.length, 17); |
| assertEquals(87, b[0]); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1382 |
| public void testCommitUserData() throws IOException { |
| Directory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| w.setMaxBufferedDocs(2); |
| for(int j=0;j<17;j++) |
| addDoc(w); |
| w.close(); |
| |
| assertEquals(0, IndexReader.getCommitUserData(dir).size()); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| // commit(Map) never called for this index |
| assertEquals(0, r.getCommitUserData().size()); |
| r.close(); |
| |
| w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| w.setMaxBufferedDocs(2); |
| for(int j=0;j<17;j++) |
| addDoc(w); |
| Map data = new HashMap(); |
| data.put("label", "test1"); |
| w.commit(data); |
| w.close(); |
| |
| assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); |
| |
| r = IndexReader.open(dir, true); |
| assertEquals("test1", r.getCommitUserData().get("label")); |
| r.close(); |
| |
| w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| w.optimize(); |
| w.close(); |
| |
| assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); |
| |
| dir.close(); |
| } |
| |
| public void testOptimizeExceptions() throws IOException { |
| RAMDirectory startDir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(startDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| w.setMaxBufferedDocs(2); |
| w.setMergeFactor(100); |
| for(int i=0;i<27;i++) |
| addDoc(w); |
| w.close(); |
| |
| for(int i=0;i<200;i++) { |
| MockRAMDirectory dir = new MockRAMDirectory(startDir); |
| w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| ((ConcurrentMergeScheduler) w.getMergeScheduler()).setSuppressExceptions(); |
| dir.setRandomIOExceptionRate(0.5, 100); |
| try { |
| w.optimize(); |
| } catch (IOException ioe) { |
| if (ioe.getCause() == null) |
| fail("optimize threw IOException without root cause"); |
| } |
| w.close(); |
| dir.close(); |
| } |
| } |
| |
| // LUCENE-1429 |
| public void testOutOfMemoryErrorCausesCloseToFail() throws Exception { |
| |
| final List thrown = new ArrayList(); |
| |
| final IndexWriter writer = new IndexWriter(new MockRAMDirectory(), new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.UNLIMITED) { |
| @Override |
| public void message(final String message) { |
| if (message.startsWith("now flush at close") && 0 == thrown.size()) { |
| thrown.add(null); |
| throw new OutOfMemoryError("fake OOME at " + message); |
| } |
| } |
| }; |
| |
| // need to set an info stream so message is called |
| writer.setInfoStream(new PrintStream(new ByteArrayOutputStream())); |
| try { |
| writer.close(); |
| fail("OutOfMemoryError expected"); |
| } |
| catch (final OutOfMemoryError expected) {} |
| |
| // throws IllegalStateEx w/o bug fix |
| writer.close(); |
| } |
| |
| // LUCENE-1442 |
| public void testDoubleOffsetCounting() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Field f = new Field("field", "abcd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f); |
| Field f2 = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f2); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); |
| |
| // Token "" occurred once |
| assertEquals(1, termOffsets.length); |
| assertEquals(8, termOffsets[0].getStartOffset()); |
| assertEquals(8, termOffsets[0].getEndOffset()); |
| |
| // Token "abcd" occurred three times |
| termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(1); |
| assertEquals(3, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| assertEquals(4, termOffsets[1].getStartOffset()); |
| assertEquals(8, termOffsets[1].getEndOffset()); |
| assertEquals(8, termOffsets[2].getStartOffset()); |
| assertEquals(12, termOffsets[2].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1442 |
| public void testDoubleOffsetCounting2() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Field f = new Field("field", "abcd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); |
| assertEquals(2, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| assertEquals(5, termOffsets[1].getStartOffset()); |
| assertEquals(9, termOffsets[1].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionCharAnalyzer() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Field f = new Field("field", "abcd ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); |
| assertEquals(2, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| assertEquals(8, termOffsets[1].getStartOffset()); |
| assertEquals(12, termOffsets[1].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionWithCachingTokenFilter() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| Analyzer analyzer = new WhitespaceAnalyzer(); |
| IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); |
| Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); |
| assertEquals(2, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| assertEquals(8, termOffsets[1].getStartOffset()); |
| assertEquals(12, termOffsets[1].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| Analyzer analyzer = new WhitespaceAnalyzer(); |
| IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); |
| TokenStream sink = tee.newSinkTokenStream(); |
| Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f1); |
| doc.add(f2); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); |
| assertEquals(2, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| assertEquals(8, termOffsets[1].getStartOffset()); |
| assertEquals(12, termOffsets[1].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionStopFilter() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new StopAnalyzer(Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Field f = new Field("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); |
| assertEquals(2, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| assertEquals(9, termOffsets[1].getStartOffset()); |
| assertEquals(13, termOffsets[1].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionStandard() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Field f = new Field("field", "abcd the ", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| Field f2 = new Field("field", "crunch man", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); |
| TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); |
| assertEquals(1, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| termOffsets = tpv.getOffsets(1); |
| assertEquals(11, termOffsets[0].getStartOffset()); |
| assertEquals(17, termOffsets[0].getEndOffset()); |
| termOffsets = tpv.getOffsets(2); |
| assertEquals(18, termOffsets[0].getStartOffset()); |
| assertEquals(21, termOffsets[0].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionStandardEmptyField() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Field f = new Field("field", "", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| Field f2 = new Field("field", "crunch man", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); |
| TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); |
| assertEquals(1, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(6, termOffsets[0].getEndOffset()); |
| termOffsets = tpv.getOffsets(1); |
| assertEquals(7, termOffsets[0].getStartOffset()); |
| assertEquals(10, termOffsets[0].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1448 |
| public void testEndOffsetPositionStandardEmptyField2() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| |
| Field f = new Field("field", "abcd", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f); |
| doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| |
| Field f2 = new Field("field", "crunch", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); |
| doc.add(f2); |
| |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); |
| TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); |
| assertEquals(1, termOffsets.length); |
| assertEquals(0, termOffsets[0].getStartOffset()); |
| assertEquals(4, termOffsets[0].getEndOffset()); |
| termOffsets = tpv.getOffsets(1); |
| assertEquals(5, termOffsets[0].getStartOffset()); |
| assertEquals(11, termOffsets[0].getEndOffset()); |
| r.close(); |
| dir.close(); |
| } |
| |
| |
| // LUCENE-1468 -- make sure opening an IndexWriter with |
| // create=true does not remove non-index files |
| |
| public void testOtherFiles() throws Throwable { |
| File indexDir = new File(System.getProperty("tempDir"), "otherfiles"); |
| Directory dir = FSDirectory.open(indexDir); |
| try { |
| // Create my own random file: |
| |
| IndexOutput out = dir.createOutput("myrandomfile"); |
| out.writeByte((byte) 42); |
| out.close(); |
| |
| new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED).close(); |
| |
| assertTrue(dir.fileExists("myrandomfile")); |
| |
| // Make sure this does not copy myrandomfile: |
| Directory dir2 = new RAMDirectory(dir); |
| assertTrue(!dir2.fileExists("myrandomfile")); |
| |
| } finally { |
| dir.close(); |
| _TestUtil.rmDir(indexDir); |
| } |
| } |
| |
| public void testDeadlock() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| writer.setMaxBufferedDocs(2); |
| Document doc = new Document(); |
| doc.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.commit(); |
| // index has 2 segments |
| |
| MockRAMDirectory dir2 = new MockRAMDirectory(); |
| IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| writer2.addDocument(doc); |
| writer2.close(); |
| |
| IndexReader r1 = IndexReader.open(dir2, true); |
| IndexReader r2 = (IndexReader) r1.clone(); |
| writer.addIndexes(new IndexReader[] {r1, r2}); |
| writer.close(); |
| |
| IndexReader r3 = IndexReader.open(dir, true); |
| assertEquals(5, r3.numDocs()); |
| r3.close(); |
| |
| r1.close(); |
| r2.close(); |
| |
| dir2.close(); |
| dir.close(); |
| } |
| |
| private class IndexerThreadInterrupt extends Thread { |
| volatile boolean failed; |
| volatile boolean finish; |
| |
| boolean allowInterrupt = false; |
| |
| @Override |
| public void run() { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter w = null; |
| boolean first = true; |
| while(!finish) { |
| try { |
| |
| while(true) { |
| if (w != null) { |
| w.close(); |
| } |
| w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| |
| //((ConcurrentMergeScheduler) w.getMergeScheduler()).setSuppressExceptions(); |
| if (!first && !allowInterrupt) { |
| // tell main thread it can interrupt us any time, |
| // starting now |
| allowInterrupt = true; |
| } |
| |
| w.setMaxBufferedDocs(2); |
| w.setMergeFactor(2); |
| Document doc = new Document(); |
| doc.add(new Field("field", "some text contents", Field.Store.YES, Field.Index.ANALYZED)); |
| for(int i=0;i<100;i++) { |
| w.addDocument(doc); |
| w.commit(); |
| } |
| w.close(); |
| _TestUtil.checkIndex(dir); |
| IndexReader.open(dir, true).close(); |
| |
| if (first && !allowInterrupt) { |
| // Strangely, if we interrupt a thread before |
| // all classes are loaded, the class loader |
| // seems to do scary things with the interrupt |
| // status. In java 1.5, it'll throw an |
| // incorrect ClassNotFoundException. In java |
| // 1.6, it'll silently clear the interrupt. |
| // So, on first iteration through here we |
| // don't open ourselves up for interrupts |
| // until we've done the above loop. |
| allowInterrupt = true; |
| first = false; |
| } |
| } |
| } catch (ThreadInterruptedException re) { |
| Throwable e = re.getCause(); |
| assertTrue(e instanceof InterruptedException); |
| if (finish) { |
| break; |
| } |
| |
| // Make sure IW cleared the interrupted bit |
| // TODO: remove that false once test is fixed for real |
| if (false && interrupted()) { |
| System.out.println("FAILED; InterruptedException hit but thread.interrupted() was true"); |
| e.printStackTrace(System.out); |
| failed = true; |
| break; |
| } |
| |
| } catch (Throwable t) { |
| System.out.println("FAILED; unexpected exception"); |
| t.printStackTrace(System.out); |
| failed = true; |
| break; |
| } |
| } |
| |
| if (!failed) { |
| try { |
| _TestUtil.checkIndex(dir); |
| } catch (Exception e) { |
| failed = true; |
| System.out.println("CheckIndex FAILED: unexpected exception"); |
| e.printStackTrace(System.out); |
| } |
| try { |
| IndexReader r = IndexReader.open(dir, true); |
| //System.out.println("doc count=" + r.numDocs()); |
| r.close(); |
| } catch (Exception e) { |
| failed = true; |
| System.out.println("IndexReader.open FAILED: unexpected exception"); |
| e.printStackTrace(System.out); |
| } |
| } |
| } |
| } |
| |
| public void testThreadInterruptDeadlock() throws Exception { |
| IndexerThreadInterrupt t = new IndexerThreadInterrupt(); |
| t.setDaemon(true); |
| t.start(); |
| |
| // issue 100 interrupts to child thread |
| int i = 0; |
| while(i < 100) { |
| Thread.sleep(1); |
| |
| if (t.allowInterrupt) { |
| i++; |
| t.allowInterrupt = false; |
| t.interrupt(); |
| } |
| if (!t.isAlive()) { |
| break; |
| } |
| } |
| t.allowInterrupt = false; |
| t.finish = true; |
| t.interrupt(); |
| t.join(); |
| assertFalse(t.failed); |
| } |
| |
| |
| public void testIndexStoreCombos() throws Exception { |
| MockRAMDirectory dir = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); |
| byte[] b = new byte[50]; |
| for(int i=0;i<50;i++) |
| b[i] = (byte) (i+77); |
| |
| Document doc = new Document(); |
| Field f = new Field("binary", b, 10, 17, Field.Store.YES); |
| f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc1field1"))); |
| Field f2 = new Field("string", "value", Field.Store.YES,Field.Index.ANALYZED); |
| f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc1field2"))); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| |
| // add 2 docs to test in-memory merging |
| f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc2field1"))); |
| f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc2field2"))); |
| w.addDocument(doc); |
| |
| // force segment flush so we can force a segment merge with doc3 later. |
| w.commit(); |
| |
| f.setTokenStream(new WhitespaceTokenizer(new StringReader("doc3field1"))); |
| f2.setTokenStream(new WhitespaceTokenizer(new StringReader("doc3field2"))); |
| |
| w.addDocument(doc); |
| w.commit(); |
| w.optimize(); // force segment merge. |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| doc = ir.document(0); |
| f = doc.getField("binary"); |
| b = f.getBinaryValue(); |
| assertTrue(b != null); |
| assertEquals(17, b.length, 17); |
| assertEquals(87, b[0]); |
| |
| assertTrue(ir.document(0).getFieldable("binary").isBinary()); |
| assertTrue(ir.document(1).getFieldable("binary").isBinary()); |
| assertTrue(ir.document(2).getFieldable("binary").isBinary()); |
| |
| assertEquals("value", ir.document(0).get("string")); |
| assertEquals("value", ir.document(1).get("string")); |
| assertEquals("value", ir.document(2).get("string")); |
| |
| |
| // test that the terms were indexed. |
| assertTrue(ir.termDocs(new Term("binary","doc1field1")).next()); |
| assertTrue(ir.termDocs(new Term("binary","doc2field1")).next()); |
| assertTrue(ir.termDocs(new Term("binary","doc3field1")).next()); |
| assertTrue(ir.termDocs(new Term("string","doc1field2")).next()); |
| assertTrue(ir.termDocs(new Term("string","doc2field2")).next()); |
| assertTrue(ir.termDocs(new Term("string","doc3field2")).next()); |
| |
| ir.close(); |
| dir.close(); |
| |
| } |
| |
| // LUCENE-1727: make sure doc fields are stored in order |
| public void testStoredFieldsOrder() throws Throwable { |
| Directory d = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("zzz", "a b c", Field.Store.YES, Field.Index.NO)); |
| doc.add(new Field("aaa", "a b c", Field.Store.YES, Field.Index.NO)); |
| doc.add(new Field("zzz", "1 2 3", Field.Store.YES, Field.Index.NO)); |
| w.addDocument(doc); |
| IndexReader r = w.getReader(); |
| doc = r.document(0); |
| Iterator it = doc.getFields().iterator(); |
| assertTrue(it.hasNext()); |
| Field f = (Field) it.next(); |
| assertEquals(f.name(), "zzz"); |
| assertEquals(f.stringValue(), "a b c"); |
| |
| assertTrue(it.hasNext()); |
| f = (Field) it.next(); |
| assertEquals(f.name(), "aaa"); |
| assertEquals(f.stringValue(), "a b c"); |
| |
| assertTrue(it.hasNext()); |
| f = (Field) it.next(); |
| assertEquals(f.name(), "zzz"); |
| assertEquals(f.stringValue(), "1 2 3"); |
| assertFalse(it.hasNext()); |
| r.close(); |
| w.close(); |
| d.close(); |
| } |
| |
| public void testEmbeddedFFFF() throws Throwable { |
| |
| Directory d = new MockRAMDirectory(); |
| IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| Document doc = new Document(); |
| doc.add(new Field("field", "a a\uffffb", Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| doc = new Document(); |
| doc.add(new Field("field", "a", Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| w.close(); |
| |
| _TestUtil.checkIndex(d); |
| d.close(); |
| } |
| |
| public void testNoDocsIndex() throws Throwable { |
| Directory dir = new MockRAMDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); |
| writer.setUseCompoundFile(false); |
| ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); |
| writer.setInfoStream(new PrintStream(bos)); |
| writer.addDocument(new Document()); |
| writer.close(); |
| |
| _TestUtil.checkIndex(dir); |
| dir.close(); |
| } |
| } |