| package org.apache.lucene.index; |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Random; |
| import java.util.Set; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field.Index; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.document.Field.TermVector; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.Fieldable; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.FieldCache; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.PhraseQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.Lock; |
| import org.apache.lucene.store.LockFactory; |
| import org.apache.lucene.store.MockDirectoryWrapper; |
| import org.apache.lucene.store.NoLockFactory; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.store.SingleInstanceLockFactory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CharsRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.ThreadInterruptedException; |
| import org.apache.lucene.util.UnicodeUtil; |
| import org.apache.lucene.util._TestUtil; |
| |
| public class TestIndexWriter extends LuceneTestCase { |
| |
| public void testDocCount() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = null; |
| IndexReader reader = null; |
| int i; |
| |
| long savedWriteLockTimeout = IndexWriterConfig.getDefaultWriteLockTimeout(); |
| try { |
| IndexWriterConfig.setDefaultWriteLockTimeout(2000); |
| assertEquals(2000, IndexWriterConfig.getDefaultWriteLockTimeout()); |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| } finally { |
| IndexWriterConfig.setDefaultWriteLockTimeout(savedWriteLockTimeout); |
| } |
| |
| // add 100 documents |
| for (i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| assertEquals(100, writer.maxDoc()); |
| writer.close(); |
| |
| // delete 40 documents |
| reader = IndexReader.open(dir, false); |
| for (i = 0; i < 40; i++) { |
| reader.deleteDocument(i); |
| } |
| reader.close(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(60, reader.numDocs()); |
| reader.close(); |
| |
| // optimize the index and check that the new doc count is correct |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| assertEquals(60, writer.numDocs()); |
| writer.optimize(); |
| assertEquals(60, writer.maxDoc()); |
| assertEquals(60, writer.numDocs()); |
| writer.close(); |
| |
| // check that the index reader gives the same numbers. |
| reader = IndexReader.open(dir, true); |
| assertEquals(60, reader.maxDoc()); |
| assertEquals(60, reader.numDocs()); |
| reader.close(); |
| |
| // make sure opening a new index for create over |
| // this existing one works correctly: |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); |
| assertEquals(0, writer.maxDoc()); |
| assertEquals(0, writer.numDocs()); |
| writer.close(); |
| dir.close(); |
| } |
| |
| private void addDoc(IndexWriter writer) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| private void addDocWithIndex(IndexWriter writer, int index) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(newField("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("id", "" + index, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| |
| |
| public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { |
| String[] startFiles = dir.listAll(); |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).rollback(); |
| String[] endFiles = dir.listAll(); |
| |
| Arrays.sort(startFiles); |
| Arrays.sort(endFiles); |
| |
| if (!Arrays.equals(startFiles, endFiles)) { |
| fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); |
| } |
| } |
| |
| public void testOptimizeMaxNumSegments() throws IOException { |
| |
| MockDirectoryWrapper dir = newDirectory(); |
| |
| final Document doc = new Document(); |
| doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| final int incrMin = TEST_NIGHTLY ? 15 : 40; |
| for(int numDocs=10;numDocs<500;numDocs += _TestUtil.nextInt(random, incrMin, 5*incrMin)) { |
| LogDocMergePolicy ldmp = new LogDocMergePolicy(); |
| ldmp.setMinMergeDocs(1); |
| ldmp.setMergeFactor(5); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(2).setMergePolicy( |
| ldmp)); |
| for(int j=0;j<numDocs;j++) |
| writer.addDocument(doc); |
| writer.close(); |
| |
| SegmentInfos sis = new SegmentInfos(); |
| sis.read(dir); |
| final int segCount = sis.size(); |
| |
| ldmp = new LogDocMergePolicy(); |
| ldmp.setMergeFactor(5); |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, |
| new MockAnalyzer(random)).setMergePolicy(ldmp)); |
| writer.optimize(3); |
| writer.close(); |
| |
| sis = new SegmentInfos(); |
| sis.read(dir); |
| final int optSegCount = sis.size(); |
| |
| if (segCount < 3) |
| assertEquals(segCount, optSegCount); |
| else |
| assertEquals(3, optSegCount); |
| } |
| dir.close(); |
| } |
| |
| public void testOptimizeMaxNumSegments2() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| |
| final Document doc = new Document(); |
| doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| |
| LogDocMergePolicy ldmp = new LogDocMergePolicy(); |
| ldmp.setMinMergeDocs(1); |
| ldmp.setMergeFactor(4); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setMaxBufferedDocs(2).setMergePolicy(ldmp).setMergeScheduler(new ConcurrentMergeScheduler())); |
| |
| for(int iter=0;iter<10;iter++) { |
| for(int i=0;i<19;i++) |
| writer.addDocument(doc); |
| |
| writer.commit(); |
| writer.waitForMerges(); |
| writer.commit(); |
| |
| SegmentInfos sis = new SegmentInfos(); |
| sis.read(dir); |
| |
| final int segCount = sis.size(); |
| |
| writer.optimize(7); |
| writer.commit(); |
| writer.waitForMerges(); |
| |
| sis = new SegmentInfos(); |
| sis.read(dir); |
| final int optSegCount = sis.size(); |
| |
| if (segCount < 7) |
| assertEquals(segCount, optSegCount); |
| else |
| assertEquals(7, optSegCount); |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| /** |
| * Make sure optimize doesn't use any more than 1X |
| * starting index size as its temporary free space |
| * required. |
| */ |
| public void testOptimizeTempSpaceUsage() throws IOException { |
| |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10).setMergePolicy(newLogMergePolicy())); |
| if (VERBOSE) { |
| System.out.println("TEST: config1=" + writer.getConfig()); |
| } |
| |
| for(int j=0;j<500;j++) { |
| addDocWithIndex(writer, j); |
| } |
| final int termIndexInterval = writer.getConfig().getTermIndexInterval(); |
| // force one extra segment w/ different doc store so |
| // we see the doc stores get merged |
| writer.commit(); |
| addDocWithIndex(writer, 500); |
| writer.close(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: start disk usage"); |
| } |
| long startDiskUsage = 0; |
| String[] files = dir.listAll(); |
| for(int i=0;i<files.length;i++) { |
| startDiskUsage += dir.fileLength(files[i]); |
| if (VERBOSE) { |
| System.out.println(files[i] + ": " + dir.fileLength(files[i])); |
| } |
| } |
| |
| dir.resetMaxUsedSizeInBytes(); |
| dir.setTrackDiskUsage(true); |
| |
| // Import to use same term index interval else a |
| // smaller one here could increase the disk usage and |
| // cause a false failure: |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval).setMergePolicy(newLogMergePolicy())); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| writer.optimize(); |
| writer.close(); |
| long maxDiskUsage = dir.getMaxUsedSizeInBytes(); |
| assertTrue("optimize used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (4*startDiskUsage) + " (= 4X starting usage)", |
| maxDiskUsage <= 4*startDiskUsage); |
| dir.close(); |
| } |
| |
| static String arrayToString(String[] l) { |
| String s = ""; |
| for(int i=0;i<l.length;i++) { |
| if (i > 0) { |
| s += "\n "; |
| } |
| s += l[i]; |
| } |
| return s; |
| } |
| |
| // Make sure we can open an index for create even when a |
| // reader holds it open (this fails pre lock-less |
| // commits on windows): |
| public void testCreateWithReader() throws IOException { |
| Directory dir = newDirectory(); |
| |
| // add one document & close writer |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| addDoc(writer); |
| writer.close(); |
| |
| // now open reader: |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals("should be one document", reader.numDocs(), 1); |
| |
| // now open index for create: |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); |
| assertEquals("should be zero documents", writer.maxDoc(), 0); |
| addDoc(writer); |
| writer.close(); |
| |
| assertEquals("should be one document", reader.numDocs(), 1); |
| IndexReader reader2 = IndexReader.open(dir, true); |
| assertEquals("should be one document", reader2.numDocs(), 1); |
| reader.close(); |
| reader2.close(); |
| |
| dir.close(); |
| } |
| |
| public void testChangesAfterClose() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = null; |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| addDoc(writer); |
| |
| // close |
| writer.close(); |
| try { |
| addDoc(writer); |
| fail("did not hit AlreadyClosedException"); |
| } catch (AlreadyClosedException e) { |
| // expected |
| } |
| dir.close(); |
| } |
| |
| /* |
| * Simple test for "commit on close": open writer then |
| * add a bunch of docs, making sure reader does not see |
| * these docs until writer is closed. |
| */ |
| public void testCommitOnClose() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| for (int i = 0; i < 14; i++) { |
| addDoc(writer); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("content", "aaa"); |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("first number of hits", 14, hits.length); |
| searcher.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| for(int i=0;i<3;i++) { |
| for(int j=0;j<11;j++) { |
| addDoc(writer); |
| } |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader incorrectly sees changes from writer", 14, hits.length); |
| searcher.close(); |
| assertTrue("reader should have still been current", reader.isCurrent()); |
| } |
| |
| // Now, close the writer: |
| writer.close(); |
| assertFalse("reader should not be current now", reader.isCurrent()); |
| |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader did not see changes after writer was closed", 47, hits.length); |
| searcher.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| /* |
| * Simple test for "commit on close": open writer, then |
| * add a bunch of docs, making sure reader does not see |
| * them until writer has closed. Then instead of |
| * closing the writer, call abort and verify reader sees |
| * nothing was added. Then verify we can open the index |
| * and add docs to it. |
| */ |
| public void testCommitOnCloseAbort() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); |
| for (int i = 0; i < 14; i++) { |
| addDoc(writer); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("content", "aaa"); |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("first number of hits", 14, hits.length); |
| searcher.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); |
| for(int j=0;j<17;j++) { |
| addDoc(writer); |
| } |
| // Delete all docs: |
| writer.deleteDocuments(searchTerm); |
| |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader incorrectly sees changes from writer", 14, hits.length); |
| searcher.close(); |
| |
| // Now, close the writer: |
| writer.rollback(); |
| |
| assertNoUnreferencedFiles(dir, "unreferenced files remain after rollback()"); |
| |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("saw changes after writer.abort", 14, hits.length); |
| searcher.close(); |
| |
| // Now make sure we can re-open the index, add docs, |
| // and all is good: |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); |
| |
| // On abort, writer in fact may write to the same |
| // segments_N file: |
| dir.setPreventDoubleWrite(false); |
| |
| for(int i=0;i<12;i++) { |
| for(int j=0;j<17;j++) { |
| addDoc(writer); |
| } |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("reader incorrectly sees changes from writer", 14, hits.length); |
| searcher.close(); |
| } |
| |
| writer.close(); |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("didn't see changes after close", 218, hits.length); |
| searcher.close(); |
| |
| dir.close(); |
| } |
| |
| /* |
| * Verify that a writer with "commit on close" indeed |
| * cleans up the temp segments created after opening |
| * that are not referenced by the starting segments |
| * file. We check this by using MockDirectoryWrapper to |
| * measure max temp disk space used. |
| */ |
| public void testCommitOnCloseDiskUsage() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| Analyzer analyzer; |
| if (random.nextBoolean()) { |
| // no payloads |
| analyzer = new Analyzer() { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); |
| } |
| }; |
| } else { |
| // fixed length payloads |
| final int length = random.nextInt(200); |
| analyzer = new Analyzer() { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new MockFixedLengthPayloadFilter(random, |
| new MockTokenizer(reader, MockTokenizer.WHITESPACE, true), |
| length); |
| } |
| }; |
| } |
| |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer). |
| setMaxBufferedDocs(10). |
| setReaderPooling(false). |
| setMergePolicy(newLogMergePolicy(10)) |
| ); |
| for(int j=0;j<30;j++) { |
| addDocWithIndex(writer, j); |
| } |
| writer.close(); |
| dir.resetMaxUsedSizeInBytes(); |
| |
| dir.setTrackDiskUsage(true); |
| long startDiskUsage = dir.getMaxUsedSizeInBytes(); |
| writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) |
| .setOpenMode(OpenMode.APPEND). |
| setMaxBufferedDocs(10). |
| setMergeScheduler(new SerialMergeScheduler()). |
| setReaderPooling(false). |
| setMergePolicy(newLogMergePolicy(10)) |
| |
| ); |
| for(int j=0;j<1470;j++) { |
| addDocWithIndex(writer, j); |
| } |
| long midDiskUsage = dir.getMaxUsedSizeInBytes(); |
| dir.resetMaxUsedSizeInBytes(); |
| writer.optimize(); |
| writer.close(); |
| |
| IndexReader.open(dir, true).close(); |
| |
| long endDiskUsage = dir.getMaxUsedSizeInBytes(); |
| |
| // Ending index is 50X as large as starting index; due |
| // to 3X disk usage normally we allow 150X max |
| // transient usage. If something is wrong w/ deleter |
| // and it doesn't delete intermediate segments then it |
| // will exceed this 150X: |
| // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); |
| assertTrue("writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage*150), |
| midDiskUsage < 150*startDiskUsage); |
| assertTrue("writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage*150), |
| endDiskUsage < 150*startDiskUsage); |
| dir.close(); |
| } |
| |
| |
| /* |
| * Verify that calling optimize when writer is open for |
| * "commit on close" works correctly both for rollback() |
| * and close(). |
| */ |
| public void testCommitOnCloseOptimize() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| // Must disable throwing exc on double-write: this |
| // test uses IW.rollback which easily results in |
| // writing to same file more than once |
| dir.setPreventDoubleWrite(false); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(10). |
| setMergePolicy(newLogMergePolicy(10)) |
| ); |
| for(int j=0;j<17;j++) { |
| addDocWithIndex(writer, j); |
| } |
| writer.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); |
| writer.optimize(); |
| |
| if (VERBOSE) { |
| writer.setInfoStream(System.out); |
| } |
| |
| // Open a reader before closing (commiting) the writer: |
| IndexReader reader = IndexReader.open(dir, true); |
| |
| // Reader should see index as unoptimized at this |
| // point: |
| assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); |
| reader.close(); |
| |
| // Abort the writer: |
| writer.rollback(); |
| assertNoUnreferencedFiles(dir, "aborted writer after optimize"); |
| |
| // Open a reader after aborting writer: |
| reader = IndexReader.open(dir, true); |
| |
| // Reader should still see index as unoptimized: |
| assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); |
| reader.close(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: do real optimize"); |
| } |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); |
| if (VERBOSE) { |
| writer.setInfoStream(System.out); |
| } |
| writer.optimize(); |
| writer.close(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: writer closed"); |
| } |
| assertNoUnreferencedFiles(dir, "aborted writer after optimize"); |
| |
| // Open a reader after aborting writer: |
| reader = IndexReader.open(dir, true); |
| |
| // Reader should still see index as unoptimized: |
| assertTrue("Reader incorrectly sees that the index is unoptimized", reader.isOptimized()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testIndexNoDocuments() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| writer.commit(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.maxDoc()); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); |
| writer.commit(); |
| writer.close(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.maxDoc()); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testManyFields() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); |
| for(int j=0;j<100;j++) { |
| Document doc = new Document(); |
| doc.add(newField("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(100, reader.maxDoc()); |
| assertEquals(100, reader.numDocs()); |
| for(int j=0;j<100;j++) { |
| assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j))); |
| assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j))); |
| assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j))); |
| assertEquals(1, reader.docFreq(new Term("d"+j, "aaa"))); |
| assertEquals(1, reader.docFreq(new Term("e"+j, "aaa"))); |
| assertEquals(1, reader.docFreq(new Term("f"+j, "aaa"))); |
| } |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testSmallRAMBuffer() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setRAMBufferSizeMB(0.000001). |
| setMergePolicy(newLogMergePolicy(10)) |
| ); |
| int lastNumFile = dir.listAll().length; |
| for(int j=0;j<9;j++) { |
| Document doc = new Document(); |
| doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| int numFile = dir.listAll().length; |
| // Verify that with a tiny RAM buffer we see new |
| // segment after every doc |
| assertTrue(numFile > lastNumFile); |
| lastNumFile = numFile; |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| // Make sure it's OK to change RAM buffer size and |
| // maxBufferedDocs in a write session |
| public void testChangingRAMBuffer() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| |
| int lastFlushCount = -1; |
| for(int j=1;j<52;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| _TestUtil.syncConcurrentMerges(writer); |
| int flushCount = writer.getFlushCount(); |
| if (j == 1) |
| lastFlushCount = flushCount; |
| else if (j < 10) |
| // No new files should be created |
| assertEquals(flushCount, lastFlushCount); |
| else if (10 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| writer.getConfig().setRAMBufferSizeMB(0.000001); |
| writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (j < 20) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (20 == j) { |
| writer.getConfig().setRAMBufferSizeMB(16); |
| writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 30) { |
| assertEquals(flushCount, lastFlushCount); |
| } else if (30 == j) { |
| writer.getConfig().setRAMBufferSizeMB(0.000001); |
| writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (j < 40) { |
| assertTrue(flushCount> lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (40 == j) { |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 50) { |
| assertEquals(flushCount, lastFlushCount); |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (50 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testChangingRAMBuffer2() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setMaxBufferedDeleteTerms(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| |
| for(int j=1;j<52;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| int lastFlushCount = -1; |
| for(int j=1;j<52;j++) { |
| writer.deleteDocuments(new Term("field", "aaa" + j)); |
| _TestUtil.syncConcurrentMerges(writer); |
| int flushCount = writer.getFlushCount(); |
| |
| if (j == 1) |
| lastFlushCount = flushCount; |
| else if (j < 10) { |
| // No new files should be created |
| assertEquals(flushCount, lastFlushCount); |
| } else if (10 == j) { |
| assertTrue("" + j, flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| writer.getConfig().setRAMBufferSizeMB(0.000001); |
| writer.getConfig().setMaxBufferedDeleteTerms(1); |
| } else if (j < 20) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (20 == j) { |
| writer.getConfig().setRAMBufferSizeMB(16); |
| writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 30) { |
| assertEquals(flushCount, lastFlushCount); |
| } else if (30 == j) { |
| writer.getConfig().setRAMBufferSizeMB(0.000001); |
| writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| writer.getConfig().setMaxBufferedDeleteTerms(1); |
| } else if (j < 40) { |
| assertTrue(flushCount> lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (40 == j) { |
| writer.getConfig().setMaxBufferedDeleteTerms(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 50) { |
| assertEquals(flushCount, lastFlushCount); |
| writer.getConfig().setMaxBufferedDeleteTerms(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (50 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testDiverseDocs() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); |
| for(int i=0;i<3;i++) { |
| // First, docs where every term is unique (heavy on |
| // Posting instances) |
| for(int j=0;j<100;j++) { |
| Document doc = new Document(); |
| for(int k=0;k<100;k++) { |
| doc.add(newField("field", Integer.toString(random.nextInt()), Field.Store.YES, Field.Index.ANALYZED)); |
| } |
| writer.addDocument(doc); |
| } |
| |
| // Next, many single term docs where only one term |
| // occurs (heavy on byte blocks) |
| for(int j=0;j<100;j++) { |
| Document doc = new Document(); |
| doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| |
| // Next, many single term docs where only one term |
| // occurs but the terms are very long (heavy on |
| // char[] arrays) |
| for(int j=0;j<100;j++) { |
| StringBuilder b = new StringBuilder(); |
| String x = Integer.toString(j) + "."; |
| for(int k=0;k<1000;k++) |
| b.append(x); |
| String longTerm = b.toString(); |
| |
| Document doc = new Document(); |
| doc.add(newField("field", longTerm, Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| } |
| writer.close(); |
| |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1000).scoreDocs; |
| assertEquals(300, hits.length); |
| searcher.close(); |
| |
| dir.close(); |
| } |
| |
| public void testEnablingNorms() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); |
| // Enable norms for only 1 doc, pre flush |
| for(int j=0;j<10;j++) { |
| Document doc = new Document(); |
| Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); |
| if (j != 8) { |
| f.setOmitNorms(true); |
| } |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("field", "aaa"); |
| |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals(10, hits.length); |
| searcher.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); |
| // Enable norms for only 1 doc, post flush |
| for(int j=0;j<27;j++) { |
| Document doc = new Document(); |
| Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); |
| if (j != 26) { |
| f.setOmitNorms(true); |
| } |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| searcher = new IndexSearcher(dir, false); |
| hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals(27, hits.length); |
| searcher.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| reader.close(); |
| |
| dir.close(); |
| } |
| |
| public void testHighFreqTerm() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); |
| // Massive doc that has 128 K a's |
| StringBuilder b = new StringBuilder(1024*1024); |
| for(int i=0;i<4096;i++) { |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| } |
| Document doc = new Document(); |
| doc.add(newField("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(1, reader.maxDoc()); |
| assertEquals(1, reader.numDocs()); |
| Term t = new Term("field", "a"); |
| assertEquals(1, reader.docFreq(t)); |
| DocsEnum td = MultiFields.getTermDocsEnum(reader, |
| MultiFields.getDeletedDocs(reader), |
| "field", |
| new BytesRef("a")); |
| td.nextDoc(); |
| assertEquals(128*1024, td.freq()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // Make sure that a Directory implementation that does |
| // not use LockFactory at all (ie overrides makeLock and |
| // implements its own private locking) works OK. This |
| // was raised on java-dev as loss of backwards |
| // compatibility. |
| public void testNullLockFactory() throws IOException { |
| |
| final class MyRAMDirectory extends MockDirectoryWrapper { |
| private LockFactory myLockFactory; |
| MyRAMDirectory(Directory delegate) { |
| super(random, delegate); |
| lockFactory = null; |
| myLockFactory = new SingleInstanceLockFactory(); |
| } |
| @Override |
| public Lock makeLock(String name) { |
| return myLockFactory.makeLock(name); |
| } |
| } |
| |
| Directory dir = new MyRAMDirectory(new RAMDirectory()); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| for (int i = 0; i < 100; i++) { |
| addDoc(writer); |
| } |
| writer.close(); |
| Term searchTerm = new Term("content", "aaa"); |
| IndexSearcher searcher = new IndexSearcher(dir, false); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; |
| assertEquals("did not get right number of hits", 100, hits.length); |
| searcher.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setOpenMode(OpenMode.CREATE)); |
| writer.close(); |
| searcher.close(); |
| dir.close(); |
| } |
| |
| public void testFlushWithNoMerging() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(2). |
| setMergePolicy(newLogMergePolicy(10)) |
| ); |
| Document doc = new Document(); |
| doc.add(newField("field", "aaa", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| for(int i=0;i<19;i++) |
| writer.addDocument(doc); |
| writer.flush(false, true); |
| writer.close(); |
| SegmentInfos sis = new SegmentInfos(); |
| sis.read(dir); |
| // Since we flushed w/o allowing merging we should now |
| // have 10 segments |
| assertEquals(10, sis.size()); |
| dir.close(); |
| } |
| |
| // Make sure we can flush segment w/ norms, then add |
| // empty doc (no norms) and flush |
| public void testEmptyDocAfterFlushingRealDoc() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| Document doc = new Document(); |
| doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.commit(); |
| if (VERBOSE) { |
| System.out.println("\nTEST: now add empty doc"); |
| } |
| writer.addDocument(new Document()); |
| writer.close(); |
| _TestUtil.checkIndex(dir); |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(2, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // Test calling optimize(false) whereby optimize is kicked |
| // off but we don't wait for it to finish (but |
| // writer.close()) does wait |
| public void testBackgroundOptimize() throws IOException { |
| |
| Directory dir = newDirectory(); |
| for(int pass=0;pass<2;pass++) { |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setOpenMode(OpenMode.CREATE). |
| setMaxBufferedDocs(2). |
| setMergePolicy(newLogMergePolicy(51)) |
| ); |
| Document doc = new Document(); |
| doc.add(newField("field", "aaa", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| for(int i=0;i<100;i++) |
| writer.addDocument(doc); |
| writer.optimize(false); |
| |
| if (0 == pass) { |
| writer.close(); |
| IndexReader reader = IndexReader.open(dir, true); |
| assertTrue(reader.isOptimized()); |
| reader.close(); |
| } else { |
| // Get another segment to flush so we can verify it is |
| // NOT included in the optimization |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertTrue(!reader.isOptimized()); |
| reader.close(); |
| |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(dir); |
| assertEquals(2, infos.size()); |
| } |
| } |
| |
| dir.close(); |
| } |
| |
| /** |
| * Test that no NullPointerException will be raised, |
| * when adding one document with a single, empty field |
| * and term vectors enabled. |
| * @throws IOException |
| * |
| */ |
| public void testBadSegment() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| |
| Document document = new Document(); |
| document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); |
| iw.addDocument(document); |
| iw.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1036 |
| public void testMaxThreadPriority() throws IOException { |
| int pri = Thread.currentThread().getPriority(); |
| try { |
| Directory dir = newDirectory(); |
| IndexWriterConfig conf = newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); |
| ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); |
| IndexWriter iw = new IndexWriter(dir, conf); |
| Document document = new Document(); |
| document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| Thread.currentThread().setPriority(Thread.MAX_PRIORITY); |
| for(int i=0;i<4;i++) |
| iw.addDocument(document); |
| iw.close(); |
| dir.close(); |
| } finally { |
| Thread.currentThread().setPriority(pri); |
| } |
| } |
| |
| // Just intercepts all merges & verifies that we are never |
| // merging a segment with >= 20 (maxMergeDocs) docs |
| private class MyMergeScheduler extends MergeScheduler { |
| @Override |
| synchronized public void merge(IndexWriter writer) |
| throws CorruptIndexException, IOException { |
| |
| while(true) { |
| MergePolicy.OneMerge merge = writer.getNextMerge(); |
| if (merge == null) { |
| break; |
| } |
| for(int i=0;i<merge.segments.size();i++) { |
| assert merge.segments.get(i).docCount < 20; |
| } |
| writer.merge(merge); |
| } |
| } |
| |
| @Override |
| public void close() {} |
| } |
| |
| // LUCENE-1013 |
| public void testSetMaxMergeDocs() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig conf = newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setMergeScheduler(new MyMergeScheduler()).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); |
| LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); |
| lmp.setMaxMergeDocs(20); |
| lmp.setMergeFactor(2); |
| IndexWriter iw = new IndexWriter(dir, conf); |
| iw.setInfoStream(VERBOSE ? System.out : null); |
| Document document = new Document(); |
| document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, |
| Field.TermVector.YES)); |
| for(int i=0;i<177;i++) |
| iw.addDocument(document); |
| iw.close(); |
| dir.close(); |
| } |
| |
| |
| |
| public void testVariableSchema() throws Exception { |
| Directory dir = newDirectory(); |
| int delID = 0; |
| for(int i=0;i<20;i++) { |
| if (VERBOSE) { |
| System.out.println("TEST: iter=" + i); |
| } |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| //lmp.setMergeFactor(2); |
| //lmp.setUseCompoundFile(false); |
| Document doc = new Document(); |
| String contents = "aa bb cc dd ee ff gg hh ii jj kk"; |
| |
| if (i == 7) { |
| // Add empty docs here |
| doc.add(newField("content3", "", Field.Store.NO, |
| Field.Index.ANALYZED)); |
| } else { |
| Field.Store storeVal; |
| if (i%2 == 0) { |
| doc.add(newField("content4", contents, Field.Store.YES, |
| Field.Index.ANALYZED)); |
| storeVal = Field.Store.YES; |
| } else |
| storeVal = Field.Store.NO; |
| doc.add(newField("content1", contents, storeVal, |
| Field.Index.ANALYZED)); |
| doc.add(newField("content3", "", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| doc.add(newField("content5", "", storeVal, |
| Field.Index.ANALYZED)); |
| } |
| |
| for(int j=0;j<4;j++) |
| writer.addDocument(doc); |
| |
| writer.close(); |
| IndexReader reader = IndexReader.open(dir, false); |
| reader.deleteDocument(delID++); |
| reader.close(); |
| |
| if (0 == i % 4) { |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| //lmp2.setUseCompoundFile(false); |
| writer.optimize(); |
| writer.close(); |
| } |
| } |
| dir.close(); |
| } |
| |
| public void testNoWaitClose() throws Throwable { |
| Directory directory = newDirectory(); |
| |
| final Document doc = new Document(); |
| Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); |
| doc.add(idField); |
| |
| for(int pass=0;pass<2;pass++) { |
| if (VERBOSE) { |
| System.out.println("TEST: pass=" + pass); |
| } |
| |
| IndexWriterConfig conf = newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setOpenMode(OpenMode.CREATE). |
| setMaxBufferedDocs(2). |
| setMergePolicy(newLogMergePolicy()); |
| if (pass == 2) { |
| conf.setMergeScheduler(new SerialMergeScheduler()); |
| } |
| |
| IndexWriter writer = new IndexWriter(directory, conf); |
| ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| |
| for(int iter=0;iter<10;iter++) { |
| if (VERBOSE) { |
| System.out.println("TEST: iter=" + iter); |
| } |
| for(int j=0;j<199;j++) { |
| idField.setValue(Integer.toString(iter*201+j)); |
| writer.addDocument(doc); |
| } |
| |
| int delID = iter*199; |
| for(int j=0;j<20;j++) { |
| writer.deleteDocuments(new Term("id", Integer.toString(delID))); |
| delID += 5; |
| } |
| |
| // Force a bunch of merge threads to kick off so we |
| // stress out aborting them on close: |
| ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); |
| |
| final IndexWriter finalWriter = writer; |
| final ArrayList<Throwable> failure = new ArrayList<Throwable>(); |
| Thread t1 = new Thread() { |
| @Override |
| public void run() { |
| boolean done = false; |
| while(!done) { |
| for(int i=0;i<100;i++) { |
| try { |
| finalWriter.addDocument(doc); |
| } catch (AlreadyClosedException e) { |
| done = true; |
| break; |
| } catch (NullPointerException e) { |
| done = true; |
| break; |
| } catch (Throwable e) { |
| e.printStackTrace(System.out); |
| failure.add(e); |
| done = true; |
| break; |
| } |
| } |
| Thread.yield(); |
| } |
| |
| } |
| }; |
| |
| if (failure.size() > 0) { |
| throw failure.get(0); |
| } |
| |
| t1.start(); |
| |
| writer.close(false); |
| t1.join(); |
| |
| // Make sure reader can read |
| IndexReader reader = IndexReader.open(directory, true); |
| reader.close(); |
| |
| // Reopen |
| writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| } |
| writer.close(); |
| } |
| |
| directory.close(); |
| } |
| |
| |
| // LUCENE-1084: test unlimited field length |
| public void testUnlimitedMaxFieldLength() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| |
| Document doc = new Document(); |
| StringBuilder b = new StringBuilder(); |
| for(int i=0;i<10000;i++) |
| b.append(" a"); |
| b.append(" x"); |
| doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| Term t = new Term("field", "x"); |
| assertEquals(1, reader.docFreq(t)); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1044: test writer.commit() when ac=false |
| public void testForceCommit() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(2). |
| setMergePolicy(newLogMergePolicy(5)) |
| ); |
| writer.commit(); |
| |
| for (int i = 0; i < 23; i++) |
| addDoc(writer); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| writer.commit(); |
| IndexReader reader2 = reader.reopen(); |
| assertEquals(0, reader.numDocs()); |
| assertEquals(23, reader2.numDocs()); |
| reader.close(); |
| |
| for (int i = 0; i < 17; i++) |
| addDoc(writer); |
| assertEquals(23, reader2.numDocs()); |
| reader2.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| writer.commit(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(40, reader.numDocs()); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-325: test expungeDeletes, when 2 singular merges |
| // are required |
| public void testExpungeDeletes() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setMaxBufferedDocs(2).setRAMBufferSizeMB( |
| IndexWriterConfig.DISABLE_AUTO_FLUSH)); |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = newField("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| Field termVectorField = newField("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<10;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| IndexReader ir = IndexReader.open(dir, false); |
| assertEquals(10, ir.maxDoc()); |
| assertEquals(10, ir.numDocs()); |
| ir.deleteDocument(0); |
| ir.deleteDocument(7); |
| assertEquals(8, ir.numDocs()); |
| ir.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); |
| assertEquals(8, writer.numDocs()); |
| assertEquals(10, writer.maxDoc()); |
| writer.expungeDeletes(); |
| assertEquals(8, writer.numDocs()); |
| writer.close(); |
| ir = IndexReader.open(dir, true); |
| assertEquals(8, ir.maxDoc()); |
| assertEquals(8, ir.numDocs()); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-325: test expungeDeletes, when many adjacent merges are required |
| public void testExpungeDeletes2() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(2). |
| setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH). |
| setMergePolicy(newLogMergePolicy(50)) |
| ); |
| |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = newField("stored", "stored", Store.YES, |
| Index.NO); |
| document.add(storedField); |
| Field termVectorField = newField("termVector", "termVector", |
| Store.NO, Index.NOT_ANALYZED, |
| TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<98;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| IndexReader ir = IndexReader.open(dir, false); |
| assertEquals(98, ir.maxDoc()); |
| assertEquals(98, ir.numDocs()); |
| for(int i=0;i<98;i+=2) |
| ir.deleteDocument(i); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| |
| writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMergePolicy(newLogMergePolicy(3)) |
| ); |
| assertEquals(49, writer.numDocs()); |
| writer.expungeDeletes(); |
| writer.close(); |
| ir = IndexReader.open(dir, true); |
| assertEquals(49, ir.maxDoc()); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-325: test expungeDeletes without waiting, when |
| // many adjacent merges are required |
| public void testExpungeDeletes3() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(2). |
| setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH). |
| setMergePolicy(newLogMergePolicy(50)) |
| ); |
| |
| Document document = new Document(); |
| |
| document = new Document(); |
| Field storedField = newField("stored", "stored", Field.Store.YES, |
| Field.Index.NO); |
| document.add(storedField); |
| Field termVectorField = newField("termVector", "termVector", |
| Field.Store.NO, Field.Index.NOT_ANALYZED, |
| Field.TermVector.WITH_POSITIONS_OFFSETS); |
| document.add(termVectorField); |
| for(int i=0;i<98;i++) |
| writer.addDocument(document); |
| writer.close(); |
| |
| IndexReader ir = IndexReader.open(dir, false); |
| assertEquals(98, ir.maxDoc()); |
| assertEquals(98, ir.numDocs()); |
| for(int i=0;i<98;i+=2) |
| ir.deleteDocument(i); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| |
| writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMergePolicy(newLogMergePolicy(3)) |
| ); |
| writer.expungeDeletes(false); |
| writer.close(); |
| ir = IndexReader.open(dir, true); |
| assertEquals(49, ir.maxDoc()); |
| assertEquals(49, ir.numDocs()); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1179 |
| public void testEmptyFieldName() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| Document doc = new Document(); |
| doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| writer.close(); |
| dir.close(); |
| } |
| |
| |
| |
| private static final class MockIndexWriter extends IndexWriter { |
| |
| public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException { |
| super(dir, conf); |
| } |
| |
| boolean afterWasCalled; |
| boolean beforeWasCalled; |
| |
| @Override |
| public void doAfterFlush() { |
| afterWasCalled = true; |
| } |
| |
| @Override |
| protected void doBeforeFlush() throws IOException { |
| beforeWasCalled = true; |
| } |
| } |
| |
| |
| // LUCENE-1222 |
| public void testDoBeforeAfterFlush() throws IOException { |
| Directory dir = newDirectory(); |
| MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| Document doc = new Document(); |
| doc.add(newField("field", "a field", Field.Store.YES, |
| Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| w.commit(); |
| assertTrue(w.beforeWasCalled); |
| assertTrue(w.afterWasCalled); |
| w.beforeWasCalled = false; |
| w.afterWasCalled = false; |
| w.deleteDocuments(new Term("field", "field")); |
| w.commit(); |
| assertTrue(w.beforeWasCalled); |
| assertTrue(w.afterWasCalled); |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| assertEquals(0, ir.numDocs()); |
| ir.close(); |
| |
| dir.close(); |
| } |
| |
| |
| |
| final String[] utf8Data = new String[] { |
| // unpaired low surrogate |
| "ab\udc17cd", "ab\ufffdcd", |
| "\udc17abcd", "\ufffdabcd", |
| "\udc17", "\ufffd", |
| "ab\udc17\udc17cd", "ab\ufffd\ufffdcd", |
| "\udc17\udc17abcd", "\ufffd\ufffdabcd", |
| "\udc17\udc17", "\ufffd\ufffd", |
| |
| // unpaired high surrogate |
| "ab\ud917cd", "ab\ufffdcd", |
| "\ud917abcd", "\ufffdabcd", |
| "\ud917", "\ufffd", |
| "ab\ud917\ud917cd", "ab\ufffd\ufffdcd", |
| "\ud917\ud917abcd", "\ufffd\ufffdabcd", |
| "\ud917\ud917", "\ufffd\ufffd", |
| |
| // backwards surrogates |
| "ab\udc17\ud917cd", "ab\ufffd\ufffdcd", |
| "\udc17\ud917abcd", "\ufffd\ufffdabcd", |
| "\udc17\ud917", "\ufffd\ufffd", |
| "ab\udc17\ud917\udc17\ud917cd", "ab\ufffd\ud917\udc17\ufffdcd", |
| "\udc17\ud917\udc17\ud917abcd", "\ufffd\ud917\udc17\ufffdabcd", |
| "\udc17\ud917\udc17\ud917", "\ufffd\ud917\udc17\ufffd" |
| }; |
| |
| // LUCENE-510 |
| public void testInvalidUTF16() throws Throwable { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new StringSplitAnalyzer())); |
| Document doc = new Document(); |
| |
| final int count = utf8Data.length/2; |
| for(int i=0;i<count;i++) |
| doc.add(newField("f" + i, utf8Data[2*i], Field.Store.YES, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| Document doc2 = ir.document(0); |
| for(int i=0;i<count;i++) { |
| assertEquals("field " + i + " was not indexed correctly", 1, ir.docFreq(new Term("f"+i, utf8Data[2*i+1]))); |
| assertEquals("field " + i + " is incorrect", utf8Data[2*i+1], doc2.getField("f"+i).stringValue()); |
| } |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-510 |
| public void testAllUnicodeChars() throws Throwable { |
| |
| BytesRef utf8 = new BytesRef(10); |
| CharsRef utf16 = new CharsRef(10); |
| char[] chars = new char[2]; |
| for(int ch=0;ch<0x0010FFFF;ch++) { |
| |
| if (ch == 0xd800) |
| // Skip invalid code points |
| ch = 0xe000; |
| |
| int len = 0; |
| if (ch <= 0xffff) { |
| chars[len++] = (char) ch; |
| } else { |
| chars[len++] = (char) (((ch-0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START); |
| chars[len++] = (char) (((ch-0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START); |
| } |
| |
| UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); |
| |
| String s1 = new String(chars, 0, len); |
| String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8"); |
| assertEquals("codepoint " + ch, s1, s2); |
| |
| UnicodeUtil.UTF8toUTF16(utf8.bytes, 0, utf8.length, utf16); |
| assertEquals("codepoint " + ch, s1, new String(utf16.chars, 0, utf16.length)); |
| |
| byte[] b = s1.getBytes("UTF-8"); |
| assertEquals(utf8.length, b.length); |
| for(int j=0;j<utf8.length;j++) |
| assertEquals(utf8.bytes[j], b[j]); |
| } |
| } |
| |
| private int nextInt(int lim) { |
| return random.nextInt(lim); |
| } |
| |
| private int nextInt(int start, int end) { |
| return start + nextInt(end-start); |
| } |
| |
| private boolean fillUnicode(char[] buffer, char[] expected, int offset, int count) { |
| final int len = offset + count; |
| boolean hasIllegal = false; |
| |
| if (offset > 0 && buffer[offset] >= 0xdc00 && buffer[offset] < 0xe000) |
| // Don't start in the middle of a valid surrogate pair |
| offset--; |
| |
| for(int i=offset;i<len;i++) { |
| int t = nextInt(6); |
| if (0 == t && i < len-1) { |
| // Make a surrogate pair |
| // High surrogate |
| expected[i] = buffer[i++] = (char) nextInt(0xd800, 0xdc00); |
| // Low surrogate |
| expected[i] = buffer[i] = (char) nextInt(0xdc00, 0xe000); |
| } else if (t <= 1) |
| expected[i] = buffer[i] = (char) nextInt(0x80); |
| else if (2 == t) |
| expected[i] = buffer[i] = (char) nextInt(0x80, 0x800); |
| else if (3 == t) |
| expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); |
| else if (4 == t) |
| expected[i] = buffer[i] = (char) nextInt(0xe000, 0xffff); |
| else if (5 == t && i < len-1) { |
| // Illegal unpaired surrogate |
| if (nextInt(10) == 7) { |
| if (random.nextBoolean()) |
| buffer[i] = (char) nextInt(0xd800, 0xdc00); |
| else |
| buffer[i] = (char) nextInt(0xdc00, 0xe000); |
| expected[i++] = 0xfffd; |
| expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); |
| hasIllegal = true; |
| } else |
| expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); |
| } else { |
| expected[i] = buffer[i] = ' '; |
| } |
| } |
| |
| return hasIllegal; |
| } |
| |
| // LUCENE-510 |
| public void testRandomUnicodeStrings() throws Throwable { |
| char[] buffer = new char[20]; |
| char[] expected = new char[20]; |
| |
| BytesRef utf8 = new BytesRef(20); |
| CharsRef utf16 = new CharsRef(20); |
| |
| int num = 100000 * RANDOM_MULTIPLIER; |
| for (int iter = 0; iter < num; iter++) { |
| boolean hasIllegal = fillUnicode(buffer, expected, 0, 20); |
| |
| UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8); |
| if (!hasIllegal) { |
| byte[] b = new String(buffer, 0, 20).getBytes("UTF-8"); |
| assertEquals(b.length, utf8.length); |
| for(int i=0;i<b.length;i++) |
| assertEquals(b[i], utf8.bytes[i]); |
| } |
| |
| UnicodeUtil.UTF8toUTF16(utf8.bytes, 0, utf8.length, utf16); |
| assertEquals(utf16.length, 20); |
| for(int i=0;i<20;i++) |
| assertEquals(expected[i], utf16.chars[i]); |
| } |
| } |
| |
| // LUCENE-1255 |
| public void testNegativePositions() throws Throwable { |
| final TokenStream tokens = new TokenStream() { |
| final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| |
| final Iterator<String> terms = Arrays.asList("a","b","c").iterator(); |
| boolean first = true; |
| |
| @Override |
| public boolean incrementToken() { |
| if (!terms.hasNext()) return false; |
| clearAttributes(); |
| termAtt.append(terms.next()); |
| posIncrAtt.setPositionIncrement(first ? 0 : 1); |
| first = false; |
| return true; |
| } |
| }; |
| |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| Document doc = new Document(); |
| doc.add(new Field("field", tokens)); |
| w.addDocument(doc); |
| w.commit(); |
| |
| IndexSearcher s = new IndexSearcher(dir, false); |
| PhraseQuery pq = new PhraseQuery(); |
| pq.add(new Term("field", "a")); |
| pq.add(new Term("field", "b")); |
| pq.add(new Term("field", "c")); |
| ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs; |
| assertEquals(1, hits.length); |
| |
| Query q = new SpanTermQuery(new Term("field", "a")); |
| hits = s.search(q, null, 1000).scoreDocs; |
| assertEquals(1, hits.length); |
| |
| DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(), |
| MultiFields.getDeletedDocs(s.getIndexReader()), |
| "field", |
| new BytesRef("a")); |
| |
| assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals(1, tps.freq()); |
| assertEquals(0, tps.nextPosition()); |
| w.close(); |
| |
| _TestUtil.checkIndex(dir); |
| s.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1274: test writer.prepareCommit() |
| public void testPrepareCommit() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(2). |
| setMergePolicy(newLogMergePolicy(5)) |
| ); |
| writer.commit(); |
| |
| for (int i = 0; i < 23; i++) |
| addDoc(writer); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| |
| writer.prepareCommit(); |
| |
| IndexReader reader2 = IndexReader.open(dir, true); |
| assertEquals(0, reader2.numDocs()); |
| |
| writer.commit(); |
| |
| IndexReader reader3 = reader.reopen(); |
| assertEquals(0, reader.numDocs()); |
| assertEquals(0, reader2.numDocs()); |
| assertEquals(23, reader3.numDocs()); |
| reader.close(); |
| reader2.close(); |
| |
| for (int i = 0; i < 17; i++) |
| addDoc(writer); |
| |
| assertEquals(23, reader3.numDocs()); |
| reader3.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| |
| writer.prepareCommit(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(23, reader.numDocs()); |
| reader.close(); |
| |
| writer.commit(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(40, reader.numDocs()); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1274: test writer.prepareCommit() |
| public void testPrepareCommitRollback() throws IOException { |
| MockDirectoryWrapper dir = newDirectory(); |
| dir.setPreventDoubleWrite(false); |
| |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMaxBufferedDocs(2). |
| setMergePolicy(newLogMergePolicy(5)) |
| ); |
| writer.commit(); |
| |
| for (int i = 0; i < 23; i++) |
| addDoc(writer); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| |
| writer.prepareCommit(); |
| |
| IndexReader reader2 = IndexReader.open(dir, true); |
| assertEquals(0, reader2.numDocs()); |
| |
| writer.rollback(); |
| |
| IndexReader reader3 = reader.reopen(); |
| assertEquals(0, reader.numDocs()); |
| assertEquals(0, reader2.numDocs()); |
| assertEquals(0, reader3.numDocs()); |
| reader.close(); |
| reader2.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| for (int i = 0; i < 17; i++) |
| addDoc(writer); |
| |
| assertEquals(0, reader3.numDocs()); |
| reader3.close(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer.prepareCommit(); |
| |
| reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer.commit(); |
| reader = IndexReader.open(dir, true); |
| assertEquals(17, reader.numDocs()); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1274 |
| public void testPrepareCommitNoChanges() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| writer.prepareCommit(); |
| writer.commit(); |
| writer.close(); |
| |
| IndexReader reader = IndexReader.open(dir, true); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1219 |
| public void testBinaryFieldOffsetLength() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| byte[] b = new byte[50]; |
| for(int i=0;i<50;i++) |
| b[i] = (byte) (i+77); |
| |
| Document doc = new Document(); |
| Field f = new Field("binary", b, 10, 17); |
| byte[] bx = f.getBinaryValue(); |
| assertTrue(bx != null); |
| assertEquals(50, bx.length); |
| assertEquals(10, f.getBinaryOffset()); |
| assertEquals(17, f.getBinaryLength()); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| doc = ir.document(0); |
| f = doc.getField("binary"); |
| b = f.getBinaryValue(); |
| assertTrue(b != null); |
| assertEquals(17, b.length, 17); |
| assertEquals(87, b[0]); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1382 |
| public void testCommitUserData() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); |
| for(int j=0;j<17;j++) |
| addDoc(w); |
| w.close(); |
| |
| assertEquals(0, IndexReader.getCommitUserData(dir).size()); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| // commit(Map) never called for this index |
| assertEquals(0, r.getCommitUserData().size()); |
| r.close(); |
| |
| w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); |
| for(int j=0;j<17;j++) |
| addDoc(w); |
| Map<String,String> data = new HashMap<String,String>(); |
| data.put("label", "test1"); |
| w.commit(data); |
| w.close(); |
| |
| assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); |
| |
| r = IndexReader.open(dir, true); |
| assertEquals("test1", r.getCommitUserData().get("label")); |
| r.close(); |
| |
| w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| w.optimize(); |
| w.close(); |
| |
| assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); |
| |
| dir.close(); |
| } |
| |
| |
| // LUCENE-2529 |
| public void testPositionIncrementGapEmptyField() throws Exception { |
| Directory dir = newDirectory(); |
| MockAnalyzer analyzer = new MockAnalyzer(random); |
| analyzer.setPositionIncrementGap( 100 ); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, analyzer)); |
| Document doc = new Document(); |
| Field f = newField("field", "", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); |
| Field f2 = newField("field", "crunch man", Field.Store.NO, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = IndexReader.open(dir, true); |
| TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); |
| int[] poss = tpv.getTermPositions(0); |
| assertEquals(1, poss.length); |
| assertEquals(100, poss[0]); |
| poss = tpv.getTermPositions(1); |
| assertEquals(1, poss.length); |
| assertEquals(101, poss[0]); |
| r.close(); |
| dir.close(); |
| } |
| |
| |
| // LUCENE-1468 -- make sure opening an IndexWriter with |
| // create=true does not remove non-index files |
| |
| public void testOtherFiles() throws Throwable { |
| Directory dir = newDirectory(); |
| try { |
| // Create my own random file: |
| IndexOutput out = dir.createOutput("myrandomfile"); |
| out.writeByte((byte) 42); |
| out.close(); |
| |
| new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); |
| |
| assertTrue(dir.fileExists("myrandomfile")); |
| } finally { |
| dir.close(); |
| } |
| } |
| |
| public void testDeadlock() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); |
| Document doc = new Document(); |
| doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, |
| Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.commit(); |
| // index has 2 segments |
| |
| Directory dir2 = newDirectory(); |
| IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| writer2.addDocument(doc); |
| writer2.close(); |
| |
| IndexReader r1 = IndexReader.open(dir2, true); |
| IndexReader r2 = (IndexReader) r1.clone(); |
| writer.addIndexes(r1, r2); |
| writer.close(); |
| |
| IndexReader r3 = IndexReader.open(dir, true); |
| assertEquals(5, r3.numDocs()); |
| r3.close(); |
| |
| r1.close(); |
| r2.close(); |
| |
| dir2.close(); |
| dir.close(); |
| } |
| |
| private class IndexerThreadInterrupt extends Thread { |
| volatile boolean failed; |
| volatile boolean finish; |
| |
| volatile boolean allowInterrupt = false; |
| |
| @Override |
| public void run() { |
| // LUCENE-2239: won't work with NIOFS/MMAP |
| Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); |
| IndexWriter w = null; |
| while(!finish) { |
| try { |
| |
| while(!finish) { |
| if (w != null) { |
| w.close(); |
| w = null; |
| } |
| IndexWriterConfig conf = newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2); |
| w = new IndexWriter(dir, conf); |
| |
| Document doc = new Document(); |
| doc.add(newField("field", "some text contents", Field.Store.YES, Field.Index.ANALYZED)); |
| for(int i=0;i<100;i++) { |
| w.addDocument(doc); |
| if (i%10 == 0) { |
| w.commit(); |
| } |
| } |
| w.close(); |
| w = null; |
| _TestUtil.checkIndex(dir); |
| IndexReader.open(dir, true).close(); |
| |
| // Strangely, if we interrupt a thread before |
| // all classes are loaded, the class loader |
| // seems to do scary things with the interrupt |
| // status. In java 1.5, it'll throw an |
| // incorrect ClassNotFoundException. In java |
| // 1.6, it'll silently clear the interrupt. |
| // So, on first iteration through here we |
| // don't open ourselves up for interrupts |
| // until we've done the above loop. |
| allowInterrupt = true; |
| } |
| } catch (ThreadInterruptedException re) { |
| if (VERBOSE) { |
| System.out.println("TEST: got interrupt"); |
| re.printStackTrace(System.out); |
| } |
| Throwable e = re.getCause(); |
| assertTrue(e instanceof InterruptedException); |
| if (finish) { |
| break; |
| } |
| } catch (Throwable t) { |
| System.out.println("FAILED; unexpected exception"); |
| t.printStackTrace(System.out); |
| failed = true; |
| break; |
| } |
| } |
| |
| if (!failed) { |
| // clear interrupt state: |
| Thread.interrupted(); |
| if (w != null) { |
| try { |
| w.rollback(); |
| } catch (IOException ioe) { |
| throw new RuntimeException(ioe); |
| } |
| } |
| |
| try { |
| _TestUtil.checkIndex(dir); |
| } catch (Exception e) { |
| failed = true; |
| System.out.println("CheckIndex FAILED: unexpected exception"); |
| e.printStackTrace(System.out); |
| } |
| try { |
| IndexReader r = IndexReader.open(dir, true); |
| //System.out.println("doc count=" + r.numDocs()); |
| r.close(); |
| } catch (Exception e) { |
| failed = true; |
| System.out.println("IndexReader.open FAILED: unexpected exception"); |
| e.printStackTrace(System.out); |
| } |
| } |
| try { |
| dir.close(); |
| } catch (IOException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| } |
| |
| public void testThreadInterruptDeadlock() throws Exception { |
| IndexerThreadInterrupt t = new IndexerThreadInterrupt(); |
| t.setDaemon(true); |
| t.start(); |
| |
| // Force class loader to load ThreadInterruptedException |
| // up front... else we can see a false failure if 2nd |
| // interrupt arrives while class loader is trying to |
| // init this class (in servicing a first interrupt): |
| assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); |
| |
| // issue 100 interrupts to child thread |
| int i = 0; |
| while(i < 100) { |
| Thread.sleep(10); |
| if (t.allowInterrupt) { |
| i++; |
| t.interrupt(); |
| } |
| if (!t.isAlive()) { |
| break; |
| } |
| } |
| t.finish = true; |
| t.join(); |
| assertFalse(t.failed); |
| } |
| |
| |
| public void testIndexStoreCombos() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| byte[] b = new byte[50]; |
| for(int i=0;i<50;i++) |
| b[i] = (byte) (i+77); |
| |
| Document doc = new Document(); |
| Field f = new Field("binary", b, 10, 17); |
| f.setTokenStream(new MockTokenizer(new StringReader("doc1field1"), MockTokenizer.WHITESPACE, false)); |
| Field f2 = newField("string", "value", Field.Store.YES,Field.Index.ANALYZED); |
| f2.setTokenStream(new MockTokenizer(new StringReader("doc1field2"), MockTokenizer.WHITESPACE, false)); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| |
| // add 2 docs to test in-memory merging |
| f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); |
| f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); |
| w.addDocument(doc); |
| |
| // force segment flush so we can force a segment merge with doc3 later. |
| w.commit(); |
| |
| f.setTokenStream(new MockTokenizer(new StringReader("doc3field1"), MockTokenizer.WHITESPACE, false)); |
| f2.setTokenStream(new MockTokenizer(new StringReader("doc3field2"), MockTokenizer.WHITESPACE, false)); |
| |
| w.addDocument(doc); |
| w.commit(); |
| w.optimize(); // force segment merge. |
| w.close(); |
| |
| IndexReader ir = IndexReader.open(dir, true); |
| doc = ir.document(0); |
| f = doc.getField("binary"); |
| b = f.getBinaryValue(); |
| assertTrue(b != null); |
| assertEquals(17, b.length, 17); |
| assertEquals(87, b[0]); |
| |
| assertTrue(ir.document(0).getFieldable("binary").isBinary()); |
| assertTrue(ir.document(1).getFieldable("binary").isBinary()); |
| assertTrue(ir.document(2).getFieldable("binary").isBinary()); |
| |
| assertEquals("value", ir.document(0).get("string")); |
| assertEquals("value", ir.document(1).get("string")); |
| assertEquals("value", ir.document(2).get("string")); |
| |
| |
| // test that the terms were indexed. |
| assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc1field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc2field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc3field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc1field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc2field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc3field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| |
| ir.close(); |
| dir.close(); |
| |
| } |
| |
| // LUCENE-1727: make sure doc fields are stored in order |
| public void testStoredFieldsOrder() throws Throwable { |
| Directory d = newDirectory(); |
| IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| Document doc = new Document(); |
| doc.add(newField("zzz", "a b c", Field.Store.YES, Field.Index.NO)); |
| doc.add(newField("aaa", "a b c", Field.Store.YES, Field.Index.NO)); |
| doc.add(newField("zzz", "1 2 3", Field.Store.YES, Field.Index.NO)); |
| w.addDocument(doc); |
| IndexReader r = w.getReader(); |
| doc = r.document(0); |
| Iterator<Fieldable> it = doc.getFields().iterator(); |
| assertTrue(it.hasNext()); |
| Field f = (Field) it.next(); |
| assertEquals(f.name(), "zzz"); |
| assertEquals(f.stringValue(), "a b c"); |
| |
| assertTrue(it.hasNext()); |
| f = (Field) it.next(); |
| assertEquals(f.name(), "aaa"); |
| assertEquals(f.stringValue(), "a b c"); |
| |
| assertTrue(it.hasNext()); |
| f = (Field) it.next(); |
| assertEquals(f.name(), "zzz"); |
| assertEquals(f.stringValue(), "1 2 3"); |
| assertFalse(it.hasNext()); |
| r.close(); |
| w.close(); |
| d.close(); |
| } |
| |
| public void testEmbeddedFFFF() throws Throwable { |
| |
| Directory d = newDirectory(); |
| IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| Document doc = new Document(); |
| doc.add(newField("field", "a a\uffffb", Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| doc = new Document(); |
| doc.add(newField("field", "a", Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| IndexReader r = w.getReader(); |
| assertEquals(1, r.docFreq(new Term("field", "a\uffffb"))); |
| r.close(); |
| w.close(); |
| _TestUtil.checkIndex(d); |
| d.close(); |
| } |
| |
| public void testNoDocsIndex() throws Throwable { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); |
| writer.setInfoStream(new PrintStream(bos)); |
| writer.addDocument(new Document()); |
| writer.close(); |
| |
| _TestUtil.checkIndex(dir); |
| dir.close(); |
| } |
| |
| // LUCENE-2095: make sure with multiple threads commit |
| // doesn't return until all changes are in fact in the |
| // index |
| public void testCommitThreadSafety() throws Throwable { |
| final int NUM_THREADS = 5; |
| final double RUN_SEC = 0.5; |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); |
| _TestUtil.reduceOpenFiles(w.w); |
| w.commit(); |
| final AtomicBoolean failed = new AtomicBoolean(); |
| Thread[] threads = new Thread[NUM_THREADS]; |
| final long endTime = System.currentTimeMillis()+((long) (RUN_SEC*1000)); |
| for(int i=0;i<NUM_THREADS;i++) { |
| final int finalI = i; |
| threads[i] = new Thread() { |
| @Override |
| public void run() { |
| try { |
| final Document doc = new Document(); |
| IndexReader r = IndexReader.open(dir); |
| Field f = newField("f", "", Field.Store.NO, Field.Index.NOT_ANALYZED); |
| doc.add(f); |
| int count = 0; |
| do { |
| if (failed.get()) break; |
| for(int j=0;j<10;j++) { |
| final String s = finalI + "_" + String.valueOf(count++); |
| f.setValue(s); |
| w.addDocument(doc); |
| w.commit(); |
| IndexReader r2 = r.reopen(); |
| assertTrue(r2 != r); |
| r.close(); |
| r = r2; |
| assertEquals("term=f:" + s + "; r=" + r, 1, r.docFreq(new Term("f", s))); |
| } |
| } while(System.currentTimeMillis() < endTime); |
| r.close(); |
| } catch (Throwable t) { |
| failed.set(true); |
| throw new RuntimeException(t); |
| } |
| } |
| }; |
| threads[i].start(); |
| } |
| for(int i=0;i<NUM_THREADS;i++) { |
| threads[i].join(); |
| } |
| assertFalse(failed.get()); |
| w.close(); |
| dir.close(); |
| } |
| |
| // both start & end are inclusive |
| private final int getInt(Random r, int start, int end) { |
| return start + r.nextInt(1+end-start); |
| } |
| |
| private void checkTermsOrder(IndexReader r, Set<String> allTerms, boolean isTop) throws IOException { |
| TermsEnum terms = MultiFields.getFields(r).terms("f").iterator(); |
| |
| BytesRef last = new BytesRef(); |
| |
| Set<String> seenTerms = new HashSet<String>(); |
| |
| while(true) { |
| final BytesRef term = terms.next(); |
| if (term == null) { |
| break; |
| } |
| |
| assertTrue(last.compareTo(term) < 0); |
| last.copy(term); |
| |
| final String s = term.utf8ToString(); |
| assertTrue("term " + termDesc(s) + " was not added to index (count=" + allTerms.size() + ")", allTerms.contains(s)); |
| seenTerms.add(s); |
| } |
| |
| if (isTop) { |
| assertTrue(allTerms.equals(seenTerms)); |
| } |
| |
| // Test seeking: |
| Iterator<String> it = seenTerms.iterator(); |
| while(it.hasNext()) { |
| BytesRef tr = new BytesRef(it.next()); |
| assertEquals("seek failed for term=" + termDesc(tr.utf8ToString()), |
| TermsEnum.SeekStatus.FOUND, |
| terms.seek(tr)); |
| } |
| } |
| |
| private final String asUnicodeChar(char c) { |
| return "U+" + Integer.toHexString(c); |
| } |
| |
| private final String termDesc(String s) { |
| final String s0; |
| assertTrue(s.length() <= 2); |
| if (s.length() == 1) { |
| s0 = asUnicodeChar(s.charAt(0)); |
| } else { |
| s0 = asUnicodeChar(s.charAt(0)) + "," + asUnicodeChar(s.charAt(1)); |
| } |
| return s0; |
| } |
| |
| // Make sure terms, including ones with surrogate pairs, |
| // sort in codepoint sort order by default |
| public void testTermUTF16SortOrder() throws Throwable { |
| Random rnd = random; |
| Directory dir = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(rnd, dir); |
| Document d = new Document(); |
| // Single segment |
| Field f = newField("f", "", Field.Store.NO, Field.Index.NOT_ANALYZED); |
| d.add(f); |
| char[] chars = new char[2]; |
| final Set<String> allTerms = new HashSet<String>(); |
| |
| int num = 200 * RANDOM_MULTIPLIER; |
| for (int i = 0; i < num; i++) { |
| |
| final String s; |
| if (rnd.nextBoolean()) { |
| // Single char |
| if (rnd.nextBoolean()) { |
| // Above surrogates |
| chars[0] = (char) getInt(rnd, 1+UnicodeUtil.UNI_SUR_LOW_END, 0xffff); |
| } else { |
| // Below surrogates |
| chars[0] = (char) getInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START-1); |
| } |
| s = new String(chars, 0, 1); |
| } else { |
| // Surrogate pair |
| chars[0] = (char) getInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END); |
| assertTrue(((int) chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int) chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END); |
| chars[1] = (char) getInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END); |
| s = new String(chars, 0, 2); |
| } |
| allTerms.add(s); |
| f.setValue(s); |
| |
| writer.addDocument(d); |
| |
| if ((1+i) % 42 == 0) { |
| writer.commit(); |
| } |
| } |
| |
| IndexReader r = writer.getReader(); |
| |
| // Test each sub-segment |
| final IndexReader[] subs = r.getSequentialSubReaders(); |
| for(int i=0;i<subs.length;i++) { |
| checkTermsOrder(subs[i], allTerms, false); |
| } |
| checkTermsOrder(r, allTerms, true); |
| |
| // Test multi segment |
| r.close(); |
| |
| writer.optimize(); |
| |
| // Test optimized single segment |
| r = writer.getReader(); |
| checkTermsOrder(r, allTerms, true); |
| r.close(); |
| |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testIndexDivisor() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); |
| config.setTermIndexInterval(2); |
| IndexWriter w = new IndexWriter(dir, config); |
| StringBuilder s = new StringBuilder(); |
| // must be > 256 |
| for(int i=0;i<300;i++) { |
| s.append(' ').append(i); |
| } |
| Document d = new Document(); |
| Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED); |
| d.add(f); |
| w.addDocument(d); |
| |
| IndexReader r = w.getReader().getSequentialSubReaders()[0]; |
| TermsEnum t = r.fields().terms("field").iterator(); |
| int count = 0; |
| while(t.next() != null) { |
| final DocsEnum docs = t.docs(null, null); |
| assertEquals(0, docs.nextDoc()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc()); |
| count++; |
| } |
| assertEquals(300, count); |
| r.close(); |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testDeleteUnusedFiles() throws Exception { |
| for(int iter=0;iter<2;iter++) { |
| Directory dir = newDirectory(); |
| |
| LogMergePolicy mergePolicy = newLogMergePolicy(true); |
| mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS |
| |
| IndexWriter w = new IndexWriter( |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). |
| setMergePolicy(mergePolicy) |
| ); |
| Document doc = new Document(); |
| doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| IndexReader r; |
| if (iter == 0) { |
| // use NRT |
| r = w.getReader(); |
| } else { |
| // don't use NRT |
| w.commit(); |
| r = IndexReader.open(dir); |
| } |
| |
| List<String> files = Arrays.asList(dir.listAll()); |
| assertTrue(files.contains("_0.cfs")); |
| w.addDocument(doc); |
| w.optimize(); |
| if (iter == 1) { |
| w.commit(); |
| } |
| IndexReader r2 = r.reopen(); |
| assertTrue(r != r2); |
| files = Arrays.asList(dir.listAll()); |
| assertTrue(files.contains("_0.cfs")); |
| // optimize created this |
| //assertTrue(files.contains("_2.cfs")); |
| w.deleteUnusedFiles(); |
| |
| files = Arrays.asList(dir.listAll()); |
| // r still holds this file open |
| assertTrue(files.contains("_0.cfs")); |
| //assertTrue(files.contains("_2.cfs")); |
| |
| r.close(); |
| if (iter == 0) { |
| // on closing NRT reader, it calls writer.deleteUnusedFiles |
| files = Arrays.asList(dir.listAll()); |
| assertFalse(files.contains("_0.cfs")); |
| } else { |
| // now writer can remove it |
| w.deleteUnusedFiles(); |
| files = Arrays.asList(dir.listAll()); |
| assertFalse(files.contains("_0.cfs")); |
| } |
| //assertTrue(files.contains("_2.cfs")); |
| |
| w.close(); |
| r2.close(); |
| |
| dir.close(); |
| } |
| } |
| |
| public void testDeleteUnsedFiles2() throws Exception { |
| // Validates that iw.deleteUnusedFiles() also deletes unused index commits |
| // in case a deletion policy which holds onto commits is used. |
| Directory dir = newDirectory(); |
| SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setIndexDeletionPolicy(sdp)); |
| |
| // First commit |
| Document doc = new Document(); |
| doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.commit(); |
| assertEquals(1, IndexReader.listCommits(dir).size()); |
| |
| // Keep that commit |
| sdp.snapshot("id"); |
| |
| // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. |
| doc = new Document(); |
| doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| writer.commit(); |
| assertEquals(2, IndexReader.listCommits(dir).size()); |
| |
| // Should delete the unreferenced commit |
| sdp.release("id"); |
| writer.deleteUnusedFiles(); |
| assertEquals(1, IndexReader.listCommits(dir).size()); |
| |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testIndexingThenDeleting() throws Exception { |
| final Random r = random; |
| Directory dir = newDirectory(); |
| // note this test explicitly disables payloads |
| final Analyzer analyzer = new Analyzer() { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); |
| } |
| }; |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); |
| w.setInfoStream(VERBOSE ? System.out : null); |
| Document doc = new Document(); |
| doc.add(newField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO, Field.Index.ANALYZED)); |
| int num = TEST_NIGHTLY ? 6 * RANDOM_MULTIPLIER : 3 * RANDOM_MULTIPLIER; |
| for (int iter = 0; iter < num; iter++) { |
| int count = 0; |
| |
| final boolean doIndexing = r.nextBoolean(); |
| if (VERBOSE) { |
| System.out.println("TEST: iter doIndexing=" + doIndexing); |
| } |
| if (doIndexing) { |
| // Add docs until a flush is triggered |
| final int startFlushCount = w.getFlushCount(); |
| while(w.getFlushCount() == startFlushCount) { |
| w.addDocument(doc); |
| count++; |
| } |
| } else { |
| // Delete docs until a flush is triggered |
| final int startFlushCount = w.getFlushCount(); |
| while(w.getFlushCount() == startFlushCount) { |
| w.deleteDocuments(new Term("foo", ""+count)); |
| count++; |
| } |
| } |
| assertTrue("flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count, count > 3000); |
| } |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testNoCommits() throws Exception { |
| // Tests that if we don't call commit(), the directory has 0 commits. This has |
| // changed since LUCENE-2386, where before IW would always commit on a fresh |
| // new index. |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); |
| try { |
| IndexReader.listCommits(dir); |
| fail("listCommits should have thrown an exception over empty index"); |
| } catch (IndexNotFoundException e) { |
| // that's expected ! |
| } |
| // No changes still should generate a commit, because it's a new index. |
| writer.close(); |
| assertEquals("expected 1 commits!", 1, IndexReader.listCommits(dir).size()); |
| dir.close(); |
| } |
| |
| public void testEmptyFSDirWithNoLock() throws Exception { |
| // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), |
| // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed |
| // when listAll() was called in IndexFileDeleter. |
| Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); |
| new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); |
| dir.close(); |
| } |
| |
| public void testEmptyDirRollback() throws Exception { |
| // Tests that if IW is created over an empty Directory, some documents are |
| // indexed, flushed (but not committed) and then IW rolls back, then no |
| // files are left in the Directory. |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)) |
| .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); |
| String[] files = dir.listAll(); |
| |
| writer.setInfoStream(VERBOSE ? System.out : null); |
| |
| // Creating over empty dir should not create any files, |
| // or, at most the write.lock file |
| final int extraFileCount; |
| if (files.length == 1) { |
| assertTrue(files[0].endsWith("write.lock")); |
| extraFileCount = 1; |
| } else { |
| assertEquals(0, files.length); |
| extraFileCount = 0; |
| } |
| |
| Document doc = new Document(); |
| // create as many files as possible |
| doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| // Adding just one document does not call flush yet. |
| assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); |
| |
| doc = new Document(); |
| doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| writer.addDocument(doc); |
| |
| // The second document should cause a flush. |
| assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount); |
| |
| // After rollback, IW should remove all files |
| writer.rollback(); |
| assertEquals("no files should exist in the directory after rollback", 0, dir.listAll().length); |
| |
| // Since we rolled-back above, that close should be a no-op |
| writer.close(); |
| assertEquals("expected a no-op close after IW.rollback()", 0, dir.listAll().length); |
| dir.close(); |
| } |
| |
| public void testNoSegmentFile() throws IOException { |
| Directory dir = newDirectory(); |
| dir.setLockFactory(NoLockFactory.getNoLockFactory()); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); |
| |
| Document doc = new Document(); |
| doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); |
| w.addDocument(doc); |
| w.addDocument(doc); |
| IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( |
| TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2) |
| .setOpenMode(OpenMode.CREATE)); |
| |
| w2.close(); |
| // If we don't do that, the test fails on Windows |
| w.rollback(); |
| dir.close(); |
| } |
| |
| public void testFutureCommit() throws Exception { |
| Directory dir = newDirectory(); |
| |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); |
| Document doc = new Document(); |
| w.addDocument(doc); |
| |
| // commit to "first" |
| Map<String,String> commitData = new HashMap<String,String>(); |
| commitData.put("tag", "first"); |
| w.commit(commitData); |
| |
| // commit to "second" |
| w.addDocument(doc); |
| commitData.put("tag", "second"); |
| w.commit(commitData); |
| w.close(); |
| |
| // open "first" with IndexWriter |
| IndexCommit commit = null; |
| for(IndexCommit c : IndexReader.listCommits(dir)) { |
| if (c.getUserData().get("tag").equals("first")) { |
| commit = c; |
| break; |
| } |
| } |
| |
| assertNotNull(commit); |
| |
| w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); |
| |
| assertEquals(1, w.numDocs()); |
| |
| // commit IndexWriter to "third" |
| w.addDocument(doc); |
| commitData.put("tag", "third"); |
| w.commit(commitData); |
| w.close(); |
| |
| // make sure "second" commit is still there |
| commit = null; |
| for(IndexCommit c : IndexReader.listCommits(dir)) { |
| if (c.getUserData().get("tag").equals("second")) { |
| commit = c; |
| break; |
| } |
| } |
| |
| assertNotNull(commit); |
| |
| IndexReader r = IndexReader.open(commit, true); |
| assertEquals(2, r.numDocs()); |
| r.close(); |
| |
| // open "second", w/ writeable IndexReader & commit |
| r = IndexReader.open(commit, NoDeletionPolicy.INSTANCE, false); |
| assertEquals(2, r.numDocs()); |
| r.deleteDocument(0); |
| r.deleteDocument(1); |
| commitData.put("tag", "fourth"); |
| r.commit(commitData); |
| r.close(); |
| |
| // make sure "third" commit is still there |
| commit = null; |
| for(IndexCommit c : IndexReader.listCommits(dir)) { |
| if (c.getUserData().get("tag").equals("third")) { |
| commit = c; |
| break; |
| } |
| } |
| assertNotNull(commit); |
| |
| dir.close(); |
| } |
| |
| public void testRandomStoredFields() throws IOException { |
| Directory dir = newDirectory(); |
| Random rand = random; |
| RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20))); |
| //w.w.setInfoStream(System.out); |
| //w.w.setUseCompoundFile(false); |
| if (VERBOSE) { |
| w.w.setInfoStream(System.out); |
| } |
| final int docCount = 200*RANDOM_MULTIPLIER; |
| final int fieldCount = _TestUtil.nextInt(rand, 1, 5); |
| |
| final List<Integer> fieldIDs = new ArrayList<Integer>(); |
| |
| Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); |
| |
| for(int i=0;i<fieldCount;i++) { |
| fieldIDs.add(i); |
| } |
| |
| final Map<String,Document> docs = new HashMap<String,Document>(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: build index docCount=" + docCount); |
| } |
| |
| for(int i=0;i<docCount;i++) { |
| Document doc = new Document(); |
| doc.add(idField); |
| final String id = ""+i; |
| idField.setValue(id); |
| docs.put(id, doc); |
| if (VERBOSE) { |
| System.out.println("TEST: add doc id=" + id); |
| } |
| |
| for(int field: fieldIDs) { |
| final String s; |
| if (rand.nextInt(4) != 3) { |
| s = _TestUtil.randomUnicodeString(rand, 1000); |
| doc.add(newField("f"+field, s, Field.Store.YES, Field.Index.NO)); |
| } else { |
| s = null; |
| } |
| } |
| w.addDocument(doc); |
| if (rand.nextInt(50) == 17) { |
| // mixup binding of field name -> Number every so often |
| Collections.shuffle(fieldIDs); |
| } |
| if (rand.nextInt(5) == 3 && i > 0) { |
| final String delID = ""+rand.nextInt(i); |
| if (VERBOSE) { |
| System.out.println("TEST: delete doc id=" + delID); |
| } |
| w.deleteDocuments(new Term("id", delID)); |
| docs.remove(delID); |
| } |
| } |
| |
| if (VERBOSE) { |
| System.out.println("TEST: " + docs.size() + " docs in index; now load fields"); |
| } |
| if (docs.size() > 0) { |
| String[] idsList = docs.keySet().toArray(new String[docs.size()]); |
| |
| for(int x=0;x<2;x++) { |
| IndexReader r = w.getReader(); |
| IndexSearcher s = newSearcher(r); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: cycle x=" + x + " r=" + r); |
| } |
| |
| for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) { |
| String testID = idsList[rand.nextInt(idsList.length)]; |
| if (VERBOSE) { |
| System.out.println("TEST: test id=" + testID); |
| } |
| TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1); |
| assertEquals(1, hits.totalHits); |
| Document doc = r.document(hits.scoreDocs[0].doc); |
| Document docExp = docs.get(testID); |
| for(int i=0;i<fieldCount;i++) { |
| assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i), doc.get("f"+i)); |
| } |
| } |
| s.close(); |
| r.close(); |
| w.optimize(); |
| } |
| } |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testNoUnwantedTVFiles() throws Exception { |
| |
| Directory dir = newDirectory(); |
| IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy())); |
| ((LogMergePolicy) indexWriter.getConfig().getMergePolicy()).setUseCompoundFile(false); |
| |
| String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg"; |
| BIG=BIG+BIG+BIG+BIG; |
| |
| for (int i=0; i<2; i++) { |
| Document doc = new Document(); |
| doc.add(new Field("id", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); |
| doc.add(new Field("str", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| doc.add(new Field("str2", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(new Field("str3", Integer.toString(i)+BIG, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); |
| indexWriter.addDocument(doc); |
| } |
| |
| indexWriter.close(); |
| |
| _TestUtil.checkIndex(dir); |
| |
| assertNoUnreferencedFiles(dir, "no tv files"); |
| String[] files = dir.listAll(); |
| for(String file : files) { |
| assertTrue(!file.endsWith(IndexFileNames.VECTORS_FIELDS_EXTENSION)); |
| assertTrue(!file.endsWith(IndexFileNames.VECTORS_INDEX_EXTENSION)); |
| assertTrue(!file.endsWith(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); |
| } |
| |
| dir.close(); |
| } |
| |
| public void testDeleteAllSlowly() throws Exception { |
| final Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random, dir); |
| final int NUM_DOCS = 1000 * RANDOM_MULTIPLIER; |
| final List<Integer> ids = new ArrayList<Integer>(NUM_DOCS); |
| for(int id=0;id<NUM_DOCS;id++) { |
| ids.add(id); |
| } |
| Collections.shuffle(ids, random); |
| for(int id : ids) { |
| Document doc = new Document(); |
| doc.add(newField("id", ""+id, Field.Index.NOT_ANALYZED)); |
| w.addDocument(doc); |
| } |
| Collections.shuffle(ids, random); |
| int upto = 0; |
| while(upto < ids.size()) { |
| final int left = ids.size() - upto; |
| final int inc = Math.min(left, _TestUtil.nextInt(random, 1, 20)); |
| final int limit = upto + inc; |
| while(upto < limit) { |
| w.deleteDocuments(new Term("id", ""+ids.get(upto++))); |
| } |
| final IndexReader r = w.getReader(); |
| assertEquals(NUM_DOCS - upto, r.numDocs()); |
| r.close(); |
| } |
| |
| w.close(); |
| dir.close(); |
| } |
| |
| private static class StringSplitAnalyzer extends Analyzer { |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new StringSplitTokenizer(reader); |
| } |
| } |
| |
| private static class StringSplitTokenizer extends Tokenizer { |
| private final String[] tokens; |
| private int upto = 0; |
| private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| |
| public StringSplitTokenizer(Reader r) { |
| try { |
| final StringBuilder b = new StringBuilder(); |
| final char[] buffer = new char[1024]; |
| int n; |
| while((n = r.read(buffer)) != -1) { |
| b.append(buffer, 0, n); |
| } |
| tokens = b.toString().split(" "); |
| } catch (IOException ioe) { |
| throw new RuntimeException(ioe); |
| } |
| } |
| |
| @Override |
| public final boolean incrementToken() throws IOException { |
| clearAttributes(); |
| if (upto < tokens.length) { |
| termAtt.setEmpty(); |
| termAtt.append(tokens[upto]); |
| upto++; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| } |
| |
| /** |
| * Make sure we skip wicked long terms. |
| */ |
| public void testWickedLongTerm() throws IOException { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random, dir, new StringSplitAnalyzer()); |
| |
| char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; |
| Arrays.fill(chars, 'x'); |
| Document doc = new Document(); |
| final String bigTerm = new String(chars); |
| final BytesRef bigTermBytesRef = new BytesRef(bigTerm); |
| |
| // This contents produces a too-long term: |
| String contents = "abc xyz x" + bigTerm + " another term"; |
| doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| |
| // Make sure we can add another normal document |
| doc = new Document(); |
| doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED)); |
| w.addDocument(doc); |
| |
| IndexReader reader = w.getReader(); |
| w.close(); |
| |
| // Make sure all terms < max size were indexed |
| assertEquals(2, reader.docFreq(new Term("content", "abc"))); |
| assertEquals(1, reader.docFreq(new Term("content", "bbb"))); |
| assertEquals(1, reader.docFreq(new Term("content", "term"))); |
| assertEquals(1, reader.docFreq(new Term("content", "another"))); |
| |
| // Make sure position is still incremented when |
| // massive term is skipped: |
| DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another")); |
| assertEquals(0, tps.nextDoc()); |
| assertEquals(1, tps.freq()); |
| assertEquals(3, tps.nextPosition()); |
| |
| // Make sure the doc that has the massive term is in |
| // the index: |
| assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); |
| |
| reader.close(); |
| dir.close(); |
| dir = newDirectory(); |
| |
| // Make sure we can add a document with exactly the |
| // maximum length term, and search on that term: |
| doc = new Document(); |
| Field contentField = new Field("content", "", Field.Store.NO, Field.Index.NOT_ANALYZED); |
| doc.add(contentField); |
| |
| w = new RandomIndexWriter(random, dir); |
| |
| contentField.setValue("other"); |
| w.addDocument(doc); |
| |
| contentField.setValue("term"); |
| w.addDocument(doc); |
| |
| contentField.setValue(bigTerm); |
| w.addDocument(doc); |
| |
| contentField.setValue("zzz"); |
| w.addDocument(doc); |
| |
| reader = w.getReader(); |
| w.close(); |
| assertEquals(1, reader.docFreq(new Term("content", bigTerm))); |
| |
| FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(reader, "content", random.nextBoolean()); |
| assertEquals(5, dti.numOrd()); // +1 for null ord |
| assertEquals(4, dti.size()); |
| assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef())); |
| reader.close(); |
| dir.close(); |
| } |
| } |