| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.Closeable; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.io.PrintWriter; |
| import java.io.StringReader; |
| import java.io.StringWriter; |
| import java.net.URI; |
| import java.nio.file.FileSystem; |
| import java.nio.file.Files; |
| import java.nio.file.NoSuchFileException; |
| import java.nio.file.Path; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Random; |
| import java.util.Set; |
| import java.util.concurrent.CountDownLatch; |
| import java.util.concurrent.Semaphore; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| import java.util.concurrent.atomic.AtomicInteger; |
| import java.util.concurrent.atomic.AtomicLong; |
| import java.util.concurrent.atomic.AtomicReference; |
| import java.util.function.Predicate; |
| import java.util.stream.Collectors; |
| |
| import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.CannedTokenStream; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.analysis.MockTokenFilter; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.Token; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.simpletext.SimpleTextCodec; |
| import org.apache.lucene.document.BinaryDocValuesField; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.LongPoint; |
| import org.apache.lucene.document.NumericDocValuesField; |
| import org.apache.lucene.document.SortedDocValuesField; |
| import org.apache.lucene.document.SortedNumericDocValuesField; |
| import org.apache.lucene.document.SortedSetDocValuesField; |
| import org.apache.lucene.document.StoredField; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.mockfile.ExtrasFS; |
| import org.apache.lucene.mockfile.FilterPath; |
| import org.apache.lucene.mockfile.WindowsFS; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.search.PhraseQuery; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.SearcherFactory; |
| import org.apache.lucene.search.SearcherManager; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.BaseDirectoryWrapper; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| import org.apache.lucene.store.FilterDirectory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.LockObtainFailedException; |
| import org.apache.lucene.store.MMapDirectory; |
| import org.apache.lucene.store.MockDirectoryWrapper; |
| import org.apache.lucene.store.NIOFSDirectory; |
| import org.apache.lucene.store.NoLockFactory; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.store.SimpleFSDirectory; |
| import org.apache.lucene.store.SimpleFSLockFactory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.Constants; |
| import org.apache.lucene.util.IOSupplier; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.SetOnce; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.TestUtil; |
| import org.apache.lucene.util.ThreadInterruptedException; |
| import org.apache.lucene.util.Version; |
| import org.apache.lucene.util.automaton.Automata; |
| import org.apache.lucene.util.automaton.Automaton; |
| import org.apache.lucene.util.automaton.CharacterRunAutomaton; |
| import org.junit.Ignore; |
| import org.junit.Test; |
| |
| public class TestIndexWriter extends LuceneTestCase { |
| |
| private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED); |
| public void testDocCount() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = null; |
| IndexReader reader = null; |
| int i; |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| |
| // add 100 documents |
| for (i = 0; i < 100; i++) { |
| addDocWithIndex(writer,i); |
| if (random().nextBoolean()) { |
| writer.commit(); |
| } |
| } |
| IndexWriter.DocStats docStats = writer.getDocStats(); |
| assertEquals(100, docStats.maxDoc); |
| assertEquals(100, docStats.numDocs); |
| writer.close(); |
| |
| // delete 40 documents |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(new FilterMergePolicy(NoMergePolicy.INSTANCE) { |
| @Override |
| public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> |
| readerIOSupplier) { |
| return true; |
| } |
| })); |
| |
| for (i = 0; i < 40; i++) { |
| writer.deleteDocuments(new Term("id", ""+i)); |
| if (random().nextBoolean()) { |
| writer.commit(); |
| } |
| } |
| writer.flush(); |
| docStats = writer.getDocStats(); |
| assertEquals(100, docStats.maxDoc); |
| assertEquals(60, docStats.numDocs); |
| writer.close(); |
| |
| reader = DirectoryReader.open(dir); |
| assertEquals(60, reader.numDocs()); |
| reader.close(); |
| |
| // merge the index down and check that the new doc count is correct |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| assertEquals(60, writer.getDocStats().numDocs); |
| writer.forceMerge(1); |
| docStats = writer.getDocStats(); |
| assertEquals(60, docStats.maxDoc); |
| assertEquals(60, docStats.numDocs); |
| writer.close(); |
| |
| // check that the index reader gives the same numbers. |
| reader = DirectoryReader.open(dir); |
| assertEquals(60, reader.maxDoc()); |
| assertEquals(60, reader.numDocs()); |
| reader.close(); |
| |
| // make sure opening a new index for create over |
| // this existing one works correctly: |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.CREATE)); |
| docStats = writer.getDocStats(); |
| assertEquals(0, docStats.maxDoc); |
| assertEquals(0, docStats.numDocs); |
| writer.close(); |
| dir.close(); |
| } |
| |
| static void addDoc(IndexWriter writer) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(newTextField("content", "aaa", Field.Store.NO)); |
| writer.addDocument(doc); |
| } |
| |
| static void addDocWithIndex(IndexWriter writer, int index) throws IOException |
| { |
| Document doc = new Document(); |
| doc.add(newField("content", "aaa " + index, storedTextType)); |
| doc.add(newField("id", "" + index, storedTextType)); |
| writer.addDocument(doc); |
| } |
| |
| // TODO: we have the logic in MDW to do this check, and it's better, because it knows about files it tried |
| // to delete but couldn't: we should replace this!!!! |
| public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { |
| String[] startFiles = dir.listAll(); |
| new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random()))).rollback(); |
| String[] endFiles = dir.listAll(); |
| |
| Arrays.sort(startFiles); |
| Arrays.sort(endFiles); |
| |
| if (!Arrays.equals(startFiles, endFiles)) { |
| fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); |
| } |
| } |
| |
| static String arrayToString(String[] l) { |
| String s = ""; |
| for(int i=0;i<l.length;i++) { |
| if (i > 0) { |
| s += "\n "; |
| } |
| s += l[i]; |
| } |
| return s; |
| } |
| |
| // Make sure we can open an index for create even when a |
| // reader holds it open (this fails pre lock-less |
| // commits on windows): |
| public void testCreateWithReader() throws IOException { |
| Directory dir = newDirectory(); |
| |
| // add one document & close writer |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| addDoc(writer); |
| writer.close(); |
| |
| // now open reader: |
| IndexReader reader = DirectoryReader.open(dir); |
| assertEquals("should be one document", reader.numDocs(), 1); |
| |
| // now open index for create: |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.CREATE)); |
| assertEquals("should be zero documents", writer.getDocStats().maxDoc, 0); |
| addDoc(writer); |
| writer.close(); |
| |
| assertEquals("should be one document", reader.numDocs(), 1); |
| IndexReader reader2 = DirectoryReader.open(dir); |
| assertEquals("should be one document", reader2.numDocs(), 1); |
| reader.close(); |
| reader2.close(); |
| |
| dir.close(); |
| } |
| |
| public void testChangesAfterClose() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| |
| addDoc(writer); |
| |
| // close |
| writer.close(); |
| expectThrows(AlreadyClosedException.class, () -> { |
| addDoc(writer); |
| }); |
| |
| dir.close(); |
| } |
| |
| |
| |
| public void testIndexNoDocuments() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.commit(); |
| writer.close(); |
| |
| IndexReader reader = DirectoryReader.open(dir); |
| assertEquals(0, reader.maxDoc()); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND)); |
| writer.commit(); |
| writer.close(); |
| |
| reader = DirectoryReader.open(dir); |
| assertEquals(0, reader.maxDoc()); |
| assertEquals(0, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testSmallRAMBuffer() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setRAMBufferSizeMB(0.000001) |
| .setMergePolicy(newLogMergePolicy(10)) |
| ); |
| int lastNumSegments = getSegmentCount(dir); |
| for(int j=0;j<9;j++) { |
| Document doc = new Document(); |
| doc.add(newField("field", "aaa" + j, storedTextType)); |
| writer.addDocument(doc); |
| // Verify that with a tiny RAM buffer we see new |
| // segment after every doc |
| int numSegments = getSegmentCount(dir); |
| assertTrue(numSegments > lastNumSegments); |
| lastNumSegments = numSegments; |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| /** Returns how many unique segment names are in the directory. */ |
| private static int getSegmentCount(Directory dir) throws IOException { |
| Set<String> segments = new HashSet<>(); |
| for(String file : dir.listAll()) { |
| segments.add(IndexFileNames.parseSegmentName(file)); |
| } |
| |
| return segments.size(); |
| } |
| |
| // Make sure it's OK to change RAM buffer size and |
| // maxBufferedDocs in a write session |
| public void testChangingRAMBuffer() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| |
| int lastFlushCount = -1; |
| for(int j=1;j<52;j++) { |
| Document doc = new Document(); |
| doc.add(new Field("field", "aaa" + j, storedTextType)); |
| writer.addDocument(doc); |
| TestUtil.syncConcurrentMerges(writer); |
| int flushCount = writer.getFlushCount(); |
| if (j == 1) |
| lastFlushCount = flushCount; |
| else if (j < 10) |
| // No new files should be created |
| assertEquals(flushCount, lastFlushCount); |
| else if (10 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| writer.getConfig().setRAMBufferSizeMB(0.000001); |
| writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (j < 20) { |
| assertTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (20 == j) { |
| writer.getConfig().setRAMBufferSizeMB(16); |
| writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 30) { |
| assertEquals(flushCount, lastFlushCount); |
| } else if (30 == j) { |
| writer.getConfig().setRAMBufferSizeMB(0.000001); |
| writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (j < 40) { |
| assertTrue(flushCount> lastFlushCount); |
| lastFlushCount = flushCount; |
| } else if (40 == j) { |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } else if (j < 50) { |
| assertEquals(flushCount, lastFlushCount); |
| writer.getConfig().setMaxBufferedDocs(10); |
| writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } else if (50 == j) { |
| assertTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testEnablingNorms() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(10)); |
| // Enable norms for only 1 doc, pre flush |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setOmitNorms(true); |
| for(int j=0;j<10;j++) { |
| Document doc = new Document(); |
| Field f = null; |
| if (j != 8) { |
| f = newField("field", "aaa", customType); |
| } |
| else { |
| f = newField("field", "aaa", storedTextType); |
| } |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| Term searchTerm = new Term("field", "aaa"); |
| |
| IndexReader reader = DirectoryReader.open(dir); |
| IndexSearcher searcher = newSearcher(reader); |
| ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs; |
| assertEquals(10, hits.length); |
| reader.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); |
| // Enable norms for only 1 doc, post flush |
| for(int j=0;j<27;j++) { |
| Document doc = new Document(); |
| Field f = null; |
| if (j != 26) { |
| f = newField("field", "aaa", customType); |
| } |
| else { |
| f = newField("field", "aaa", storedTextType); |
| } |
| doc.add(f); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| reader = DirectoryReader.open(dir); |
| searcher = newSearcher(reader); |
| hits = searcher.search(new TermQuery(searchTerm), 1000).scoreDocs; |
| assertEquals(27, hits.length); |
| reader.close(); |
| |
| reader = DirectoryReader.open(dir); |
| reader.close(); |
| |
| dir.close(); |
| } |
| |
| public void testHighFreqTerm() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setRAMBufferSizeMB(0.01)); |
| // Massive doc that has 128 K a's |
| StringBuilder b = new StringBuilder(1024*1024); |
| for(int i=0;i<4096;i++) { |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| b.append(" a a a a a a a a"); |
| } |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| doc.add(newField("field", b.toString(), customType)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = DirectoryReader.open(dir); |
| assertEquals(1, reader.maxDoc()); |
| assertEquals(1, reader.numDocs()); |
| Term t = new Term("field", "a"); |
| assertEquals(1, reader.docFreq(t)); |
| PostingsEnum td = |
| TestUtil.docs(random(), reader, "field", newBytesRef("a"), null, PostingsEnum.FREQS); |
| td.nextDoc(); |
| assertEquals(128*1024, td.freq()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testFlushWithNoMerging() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter( |
| dir, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2) |
| .setMergePolicy(newLogMergePolicy(10)) |
| ); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| doc.add(newField("field", "aaa", customType)); |
| for(int i=0;i<19;i++) |
| writer.addDocument(doc); |
| writer.flush(false, true); |
| writer.close(); |
| SegmentInfos sis = SegmentInfos.readLatestCommit(dir); |
| // Since we flushed w/o allowing merging we should now |
| // have 10 segments |
| assertEquals(10, sis.size()); |
| dir.close(); |
| } |
| |
| // Make sure we can flush segment w/ norms, then add |
| // empty doc (no norms) and flush |
| public void testEmptyDocAfterFlushingRealDoc() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| doc.add(newField("field", "aaa", customType)); |
| writer.addDocument(doc); |
| writer.commit(); |
| if (VERBOSE) { |
| System.out.println("\nTEST: now add empty doc"); |
| } |
| writer.addDocument(new Document()); |
| writer.close(); |
| IndexReader reader = DirectoryReader.open(dir); |
| assertEquals(2, reader.numDocs()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| /** |
| * Test that no NullPointerException will be raised, |
| * when adding one document with a single, empty field |
| * and term vectors enabled. |
| */ |
| public void testBadSegment() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| |
| Document document = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setStoreTermVectors(true); |
| document.add(newField("tvtest", "", customType)); |
| iw.addDocument(document); |
| iw.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-1036 |
| public void testMaxThreadPriority() throws IOException { |
| int pri = Thread.currentThread().getPriority(); |
| try { |
| Directory dir = newDirectory(); |
| IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2) |
| .setMergePolicy(newLogMergePolicy()); |
| ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); |
| IndexWriter iw = new IndexWriter(dir, conf); |
| Document document = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setStoreTermVectors(true); |
| document.add(newField("tvtest", "a b c", customType)); |
| Thread.currentThread().setPriority(Thread.MAX_PRIORITY); |
| for(int i=0;i<4;i++) |
| iw.addDocument(document); |
| iw.close(); |
| dir.close(); |
| } finally { |
| Thread.currentThread().setPriority(pri); |
| } |
| } |
| |
| public void testVariableSchema() throws Exception { |
| Directory dir = newDirectory(); |
| for(int i=0;i<20;i++) { |
| if (VERBOSE) { |
| System.out.println("TEST: iter=" + i); |
| } |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2) |
| .setMergePolicy(newLogMergePolicy())); |
| //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| //lmp.setMergeFactor(2); |
| //lmp.setNoCFSRatio(0.0); |
| Document doc = new Document(); |
| String contents = "aa bb cc dd ee ff gg hh ii jj kk"; |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| FieldType type = null; |
| if (i == 7) { |
| // Add empty docs here |
| doc.add(newTextField("content3", "", Field.Store.NO)); |
| } else { |
| if (i%2 == 0) { |
| doc.add(newField("content4", contents, customType)); |
| type = customType; |
| } else |
| type = TextField.TYPE_NOT_STORED; |
| doc.add(newTextField("content1", contents, Field.Store.NO)); |
| doc.add(newField("content3", "", customType)); |
| doc.add(newField("content5", "", type)); |
| } |
| |
| for(int j=0;j<4;j++) |
| writer.addDocument(doc); |
| |
| writer.close(); |
| |
| if (0 == i % 4) { |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| //lmp2.setNoCFSRatio(0.0); |
| writer.forceMerge(1); |
| writer.close(); |
| } |
| } |
| dir.close(); |
| } |
| |
| // LUCENE-1084: test unlimited field length |
| public void testUnlimitedMaxFieldLength() throws IOException { |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| |
| Document doc = new Document(); |
| StringBuilder b = new StringBuilder(); |
| for(int i=0;i<10000;i++) |
| b.append(" a"); |
| b.append(" x"); |
| doc.add(newTextField("field", b.toString(), Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.close(); |
| |
| IndexReader reader = DirectoryReader.open(dir); |
| Term t = new Term("field", "x"); |
| assertEquals(1, reader.docFreq(t)); |
| reader.close(); |
| dir.close(); |
| } |
| |
| |
| |
| // LUCENE-1179 |
| public void testEmptyFieldName() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(newTextField("", "a b c", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testEmptyFieldNameTerms() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(newTextField("", "a b c", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.close(); |
| DirectoryReader reader = DirectoryReader.open(dir); |
| LeafReader subreader = getOnlyLeafReader(reader); |
| TermsEnum te = subreader.terms("").iterator(); |
| assertEquals(newBytesRef("a"), te.next()); |
| assertEquals(newBytesRef("b"), te.next()); |
| assertEquals(newBytesRef("c"), te.next()); |
| assertNull(te.next()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testEmptyFieldNameWithEmptyTerm() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(newStringField("", "", Field.Store.NO)); |
| doc.add(newStringField("", "a", Field.Store.NO)); |
| doc.add(newStringField("", "b", Field.Store.NO)); |
| doc.add(newStringField("", "c", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.close(); |
| DirectoryReader reader = DirectoryReader.open(dir); |
| LeafReader subreader = getOnlyLeafReader(reader); |
| TermsEnum te = subreader.terms("").iterator(); |
| assertEquals(newBytesRef(""), te.next()); |
| assertEquals(newBytesRef("a"), te.next()); |
| assertEquals(newBytesRef("b"), te.next()); |
| assertEquals(newBytesRef("c"), te.next()); |
| assertNull(te.next()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| |
| |
| private static final class MockIndexWriter extends IndexWriter { |
| |
| public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException { |
| super(dir, conf); |
| } |
| |
| boolean afterWasCalled; |
| boolean beforeWasCalled; |
| |
| @Override |
| public void doAfterFlush() { |
| afterWasCalled = true; |
| } |
| |
| @Override |
| protected void doBeforeFlush() { |
| beforeWasCalled = true; |
| } |
| } |
| |
| |
| // LUCENE-1222 |
| public void testDoBeforeAfterFlush() throws IOException { |
| Directory dir = newDirectory(); |
| MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| doc.add(newField("field", "a field", customType)); |
| w.addDocument(doc); |
| w.commit(); |
| assertTrue(w.beforeWasCalled); |
| assertTrue(w.afterWasCalled); |
| w.beforeWasCalled = false; |
| w.afterWasCalled = false; |
| w.deleteDocuments(new Term("field", "field")); |
| w.commit(); |
| assertTrue(w.beforeWasCalled); |
| assertTrue(w.afterWasCalled); |
| w.close(); |
| |
| IndexReader ir = DirectoryReader.open(dir); |
| assertEquals(0, ir.numDocs()); |
| ir.close(); |
| |
| dir.close(); |
| } |
| |
| // LUCENE-1255 |
| public void testNegativePositions() throws Throwable { |
| final TokenStream tokens = new TokenStream() { |
| final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| |
| final Iterator<String> terms = Arrays.asList("a","b","c").iterator(); |
| boolean first = true; |
| |
| @Override |
| public boolean incrementToken() { |
| if (!terms.hasNext()) return false; |
| clearAttributes(); |
| termAtt.append(terms.next()); |
| posIncrAtt.setPositionIncrement(first ? 0 : 1); |
| first = false; |
| return true; |
| } |
| }; |
| |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(new TextField("field", tokens)); |
| expectThrows(IllegalArgumentException.class, () -> { |
| w.addDocument(doc); |
| }); |
| |
| w.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-2529 |
| public void testPositionIncrementGapEmptyField() throws Exception { |
| Directory dir = newDirectory(); |
| MockAnalyzer analyzer = new MockAnalyzer(random()); |
| analyzer.setPositionIncrementGap( 100 ); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| Field f = newField("field", "", customType); |
| Field f2 = newField("field", "crunch man", customType); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| w.close(); |
| |
| IndexReader r = DirectoryReader.open(dir); |
| Terms tpv = r.getTermVectors(0).terms("field"); |
| TermsEnum termsEnum = tpv.iterator(); |
| assertNotNull(termsEnum.next()); |
| PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertNotNull(dpEnum); |
| assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals(1, dpEnum.freq()); |
| assertEquals(100, dpEnum.nextPosition()); |
| |
| assertNotNull(termsEnum.next()); |
| dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL); |
| assertNotNull(dpEnum); |
| assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals(1, dpEnum.freq()); |
| assertEquals(101, dpEnum.nextPosition()); |
| assertNull(termsEnum.next()); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testDeadlock() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2)); |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| |
| doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.commit(); |
| // index has 2 segments |
| |
| Directory dir2 = newDirectory(); |
| IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer2.addDocument(doc); |
| writer2.close(); |
| |
| DirectoryReader r1 = DirectoryReader.open(dir2); |
| TestUtil.addIndexesSlowly(writer, r1, r1); |
| writer.close(); |
| |
| IndexReader r3 = DirectoryReader.open(dir); |
| assertEquals(5, r3.numDocs()); |
| r3.close(); |
| |
| r1.close(); |
| |
| dir2.close(); |
| dir.close(); |
| } |
| |
| private class IndexerThreadInterrupt extends Thread { |
| volatile boolean failed; |
| volatile boolean finish; |
| |
| volatile boolean allowInterrupt = false; |
| final Random random; |
| final Directory adder; |
| final ByteArrayOutputStream bytesLog = new ByteArrayOutputStream(); |
| final PrintStream log = new PrintStream(bytesLog, true, IOUtils.UTF_8); |
| final int id; |
| |
| IndexerThreadInterrupt(int id) throws IOException { |
| this.id = id; |
| this.random = new Random(random().nextLong()); |
| // make a little directory for addIndexes |
| // LUCENE-2239: won't work with NIOFS/MMAP |
| adder = new MockDirectoryWrapper(random, new RAMDirectory()); |
| IndexWriterConfig conf = newIndexWriterConfig(random, new MockAnalyzer(random)); |
| if (conf.getMergeScheduler() instanceof ConcurrentMergeScheduler) { |
| conf.setMergeScheduler(new SuppressingConcurrentMergeScheduler() { |
| @Override |
| protected boolean isOK(Throwable th) { |
| return th instanceof AlreadyClosedException || |
| (th instanceof IllegalStateException && th.getMessage().contains("this writer hit an unrecoverable error")); |
| } |
| }); |
| } |
| IndexWriter w = new IndexWriter(adder, conf); |
| Document doc = new Document(); |
| doc.add(newStringField(random, "id", "500", Field.Store.NO)); |
| doc.add(newField(random, "field", "some prepackaged text contents", storedTextType)); |
| doc.add(new BinaryDocValuesField("binarydv", newBytesRef("500"))); |
| doc.add(new NumericDocValuesField("numericdv", 500)); |
| doc.add(new SortedDocValuesField("sorteddv", newBytesRef("500"))); |
| doc.add(new SortedSetDocValuesField("sortedsetdv", newBytesRef("one"))); |
| doc.add(new SortedSetDocValuesField("sortedsetdv", newBytesRef("two"))); |
| doc.add(new SortedNumericDocValuesField("sortednumericdv", 4)); |
| doc.add(new SortedNumericDocValuesField("sortednumericdv", 3)); |
| w.addDocument(doc); |
| doc = new Document(); |
| doc.add(newStringField(random, "id", "501", Field.Store.NO)); |
| doc.add(newField(random, "field", "some more contents", storedTextType)); |
| doc.add(new BinaryDocValuesField("binarydv", newBytesRef("501"))); |
| doc.add(new NumericDocValuesField("numericdv", 501)); |
| doc.add(new SortedDocValuesField("sorteddv", newBytesRef("501"))); |
| doc.add(new SortedSetDocValuesField("sortedsetdv", newBytesRef("two"))); |
| doc.add(new SortedSetDocValuesField("sortedsetdv", newBytesRef("three"))); |
| doc.add(new SortedNumericDocValuesField("sortednumericdv", 6)); |
| doc.add(new SortedNumericDocValuesField("sortednumericdv", 1)); |
| w.addDocument(doc); |
| w.deleteDocuments(new Term("id", "500")); |
| w.close(); |
| } |
| |
| @Override |
| public void run() { |
| // LUCENE-2239: won't work with NIOFS/MMAP |
| MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory()); |
| |
| // open/close slowly sometimes |
| dir.setUseSlowOpenClosers(true); |
| |
| // throttle a little |
| dir.setThrottling(MockDirectoryWrapper.Throttling.SOMETIMES); |
| |
| IndexWriter w = null; |
| while(!finish) { |
| try { |
| |
| while(!finish) { |
| if (w != null) { |
| // If interrupt arrives inside here, it's |
| // fine: we will cycle back and the first |
| // thing we do is try to close again, |
| // i.e. we'll never try to open a new writer |
| // until this one successfully closes: |
| // w.rollback(); |
| try { |
| w.close(); |
| } catch (AlreadyClosedException ace) { |
| // OK |
| } |
| w = null; |
| } |
| IndexWriterConfig conf = newIndexWriterConfig(random, |
| new MockAnalyzer(random)).setMaxBufferedDocs(2); |
| if (conf.getMergeScheduler() instanceof ConcurrentMergeScheduler) { |
| conf.setMergeScheduler(new SuppressingConcurrentMergeScheduler() { |
| @Override |
| protected boolean isOK(Throwable th) { |
| return th instanceof AlreadyClosedException || |
| (th instanceof IllegalStateException && th.getMessage().contains("this writer hit an unrecoverable error")); |
| } |
| }); |
| } |
| //conf.setInfoStream(log); |
| w = new IndexWriter(dir, conf); |
| |
| Document doc = new Document(); |
| Field idField = newStringField(random, "id", "", Field.Store.NO); |
| Field binaryDVField = new BinaryDocValuesField("binarydv", newBytesRef()); |
| Field numericDVField = new NumericDocValuesField("numericdv", 0); |
| Field sortedDVField = new SortedDocValuesField("sorteddv", newBytesRef()); |
| Field sortedSetDVField = new SortedSetDocValuesField("sortedsetdv", newBytesRef()); |
| doc.add(idField); |
| doc.add(newField(random, "field", "some text contents", storedTextType)); |
| doc.add(binaryDVField); |
| doc.add(numericDVField); |
| doc.add(sortedDVField); |
| doc.add(sortedSetDVField); |
| for(int i=0;i<100;i++) { |
| //log.println("\nTEST: i=" + i); |
| idField.setStringValue(Integer.toString(i)); |
| binaryDVField.setBytesValue(newBytesRef(idField.stringValue())); |
| numericDVField.setLongValue(i); |
| sortedDVField.setBytesValue(newBytesRef(idField.stringValue())); |
| sortedSetDVField.setBytesValue(newBytesRef(idField.stringValue())); |
| int action = random.nextInt(100); |
| if (action == 17) { |
| w.addIndexes(adder); |
| } else if (action%30 == 0) { |
| w.deleteAll(); |
| } else if (action%2 == 0) { |
| w.updateDocument(new Term("id", idField.stringValue()), doc); |
| } else { |
| w.addDocument(doc); |
| } |
| if (random.nextInt(3) == 0) { |
| IndexReader r = null; |
| try { |
| r = DirectoryReader.open(w, random.nextBoolean(), false); |
| if (random.nextBoolean() && r.maxDoc() > 0) { |
| int docid = random.nextInt(r.maxDoc()); |
| w.tryDeleteDocument(r, docid); |
| } |
| } finally { |
| IOUtils.closeWhileHandlingException(r); |
| } |
| } |
| if (i%10 == 0) { |
| w.commit(); |
| } |
| if (random.nextInt(50) == 0) { |
| w.forceMerge(1); |
| } |
| } |
| w.close(); |
| w = null; |
| DirectoryReader.open(dir).close(); |
| |
| // Strangely, if we interrupt a thread before |
| // all classes are loaded, the class loader |
| // seems to do scary things with the interrupt |
| // status. In java 1.5, it'll throw an |
| // incorrect ClassNotFoundException. In java |
| // 1.6, it'll silently clear the interrupt. |
| // So, on first iteration through here we |
| // don't open ourselves up for interrupts |
| // until we've done the above loop. |
| allowInterrupt = true; |
| } |
| } catch (ThreadInterruptedException re) { |
| // NOTE: important to leave this verbosity/noise |
| // on!! This test doesn't repro easily so when |
| // Jenkins hits a fail we need to study where the |
| // interrupts struck! |
| log.println("TEST thread " + id + ": got interrupt"); |
| re.printStackTrace(log); |
| Throwable e = re.getCause(); |
| assertTrue(e instanceof InterruptedException); |
| if (finish) { |
| break; |
| } |
| } catch (Throwable t) { |
| log.println("thread " + id + " FAILED; unexpected exception"); |
| t.printStackTrace(log); |
| listIndexFiles(log, dir); |
| failed = true; |
| break; |
| } |
| } |
| |
| if (VERBOSE) { |
| log.println("TEST: thread " + id + ": now finish failed=" + failed); |
| } |
| if (!failed) { |
| if (VERBOSE) { |
| log.println("TEST: thread " + id + ": now rollback"); |
| } |
| // clear interrupt state: |
| Thread.interrupted(); |
| if (w != null) { |
| try { |
| w.rollback(); |
| } catch (IOException ioe) { |
| throw new RuntimeException(ioe); |
| } |
| } |
| |
| try { |
| TestUtil.checkIndex(dir); |
| } catch (Exception e) { |
| failed = true; |
| log.println("thread " + id + ": CheckIndex FAILED: unexpected exception"); |
| e.printStackTrace(log); |
| listIndexFiles(log, dir); |
| } |
| try { |
| IndexReader r = DirectoryReader.open(dir); |
| //System.out.println("doc count=" + r.numDocs()); |
| r.close(); |
| } catch (Exception e) { |
| failed = true; |
| log.println("thread " + id + ": DirectoryReader.open FAILED: unexpected exception"); |
| e.printStackTrace(log); |
| listIndexFiles(log, dir); |
| } |
| } |
| try { |
| IOUtils.close(dir); |
| } catch (IOException e) { |
| failed = true; |
| throw new RuntimeException("thread " + id, e); |
| } |
| try { |
| IOUtils.close(adder); |
| } catch (IOException e) { |
| failed = true; |
| throw new RuntimeException("thread " + id, e); |
| } |
| } |
| |
| private void listIndexFiles(PrintStream log, Directory dir) { |
| try { |
| log.println("index files: " + Arrays.toString(dir.listAll())); |
| } catch (IOException ioe) { |
| // Suppress |
| log.println("failed to index files:"); |
| ioe.printStackTrace(log); |
| } |
| } |
| } |
| |
| public void testThreadInterruptDeadlock() throws Exception { |
| IndexerThreadInterrupt t = new IndexerThreadInterrupt(1); |
| t.setDaemon(true); |
| t.start(); |
| |
| // Force class loader to load ThreadInterruptedException |
| // up front... else we can see a false failure if 2nd |
| // interrupt arrives while class loader is trying to |
| // init this class (in servicing a first interrupt): |
| assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); |
| |
| // issue 100 interrupts to child thread |
| final int numInterrupts = atLeast(100); |
| int i = 0; |
| while(i < numInterrupts) { |
| // TODO: would be nice to also sometimes interrupt the |
| // CMS merge threads too ... |
| Thread.sleep(10); |
| if (t.allowInterrupt) { |
| i++; |
| t.interrupt(); |
| } |
| if (!t.isAlive()) { |
| break; |
| } |
| } |
| t.finish = true; |
| t.join(); |
| if (t.failed) { |
| fail(t.bytesLog.toString("UTF-8")); |
| } |
| } |
| |
| public void testIndexStoreCombos() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| byte[] b = new byte[50]; |
| for(int i=0;i<50;i++) |
| b[i] = (byte) (i+77); |
| |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(StoredField.TYPE); |
| customType.setTokenized(true); |
| |
| Field f = new Field("binary", b, 10, 17, customType); |
| // TODO: this is evil, changing the type after creating the field: |
| customType.setIndexOptions(IndexOptions.DOCS); |
| final MockTokenizer doc1field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| doc1field1.setReader(new StringReader("doc1field1")); |
| f.setTokenStream(doc1field1); |
| |
| FieldType customType2 = new FieldType(TextField.TYPE_STORED); |
| |
| Field f2 = newField("string", "value", customType2); |
| final MockTokenizer doc1field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| doc1field2.setReader(new StringReader("doc1field2")); |
| f2.setTokenStream(doc1field2); |
| doc.add(f); |
| doc.add(f2); |
| w.addDocument(doc); |
| |
| // add 2 docs to test in-memory merging |
| final MockTokenizer doc2field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| doc2field1.setReader(new StringReader("doc2field1")); |
| f.setTokenStream(doc2field1); |
| final MockTokenizer doc2field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| doc2field2.setReader(new StringReader("doc2field2")); |
| f2.setTokenStream(doc2field2); |
| w.addDocument(doc); |
| |
| // force segment flush so we can force a segment merge with doc3 later. |
| w.commit(); |
| |
| final MockTokenizer doc3field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| doc3field1.setReader(new StringReader("doc3field1")); |
| f.setTokenStream(doc3field1); |
| final MockTokenizer doc3field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); |
| doc3field2.setReader(new StringReader("doc3field2")); |
| f2.setTokenStream(doc3field2); |
| |
| w.addDocument(doc); |
| w.commit(); |
| w.forceMerge(1); // force segment merge. |
| w.close(); |
| |
| IndexReader ir = DirectoryReader.open(dir); |
| Document doc2 = ir.document(0); |
| IndexableField f3 = doc2.getField("binary"); |
| b = f3.binaryValue().bytes; |
| assertTrue(b != null); |
| assertEquals(17, b.length, 17); |
| assertEquals(87, b[0]); |
| |
| assertTrue(ir.document(0).getField("binary").binaryValue()!=null); |
| assertTrue(ir.document(1).getField("binary").binaryValue()!=null); |
| assertTrue(ir.document(2).getField("binary").binaryValue()!=null); |
| |
| assertEquals("value", ir.document(0).get("string")); |
| assertEquals("value", ir.document(1).get("string")); |
| assertEquals("value", ir.document(2).get("string")); |
| |
| |
| // test that the terms were indexed. |
| assertTrue( |
| TestUtil.docs(random(), ir, "binary", newBytesRef("doc1field1"), null, PostingsEnum.NONE) |
| .nextDoc() |
| != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue( |
| TestUtil.docs(random(), ir, "binary", newBytesRef("doc2field1"), null, PostingsEnum.NONE) |
| .nextDoc() |
| != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue( |
| TestUtil.docs(random(), ir, "binary", newBytesRef("doc3field1"), null, PostingsEnum.NONE) |
| .nextDoc() |
| != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue( |
| TestUtil.docs(random(), ir, "string", newBytesRef("doc1field2"), null, PostingsEnum.NONE) |
| .nextDoc() |
| != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue( |
| TestUtil.docs(random(), ir, "string", newBytesRef("doc2field2"), null, PostingsEnum.NONE) |
| .nextDoc() |
| != DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue( |
| TestUtil.docs(random(), ir, "string", newBytesRef("doc3field2"), null, PostingsEnum.NONE) |
| .nextDoc() |
| != DocIdSetIterator.NO_MORE_DOCS); |
| |
| ir.close(); |
| dir.close(); |
| |
| } |
| |
| public void testNoDocsIndex() throws Throwable { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.addDocument(new Document()); |
| writer.close(); |
| |
| dir.close(); |
| } |
| |
| |
| public void testDeleteUnusedFiles() throws Exception { |
| assumeFalse("test relies on exact filenames", Codec.getDefault() instanceof SimpleTextCodec); |
| assumeWorkingMMapOnWindows(); |
| |
| for(int iter=0;iter<2;iter++) { |
| // relies on windows semantics |
| Path path = createTempDir(); |
| FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///")); |
| Path indexPath = new FilterPath(path, fs); |
| |
| // NOTE: on Unix, we cannot use MMapDir, because WindowsFS doesn't see/think it keeps file handles open. Yet, on Windows, we MUST use |
| // MMapDir because the windows OS will in fact prevent file deletion for us, and fails otherwise: |
| FSDirectory dir; |
| if (Constants.WINDOWS) { |
| dir = new MMapDirectory(indexPath); |
| } else { |
| dir = new NIOFSDirectory(indexPath); |
| } |
| |
| MergePolicy mergePolicy = newLogMergePolicy(true); |
| |
| // This test expects all of its segments to be in CFS |
| mergePolicy.setNoCFSRatio(1.0); |
| mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); |
| |
| IndexWriter w = new IndexWriter( |
| dir, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(mergePolicy) |
| .setUseCompoundFile(true) |
| ); |
| Document doc = new Document(); |
| doc.add(newTextField("field", "go", Field.Store.NO)); |
| w.addDocument(doc); |
| DirectoryReader r; |
| if (iter == 0) { |
| // use NRT |
| r = w.getReader(); |
| } else { |
| // don't use NRT |
| w.commit(); |
| r = DirectoryReader.open(dir); |
| } |
| |
| assertTrue(Files.exists(indexPath.resolve("_0.cfs"))); |
| assertTrue(Files.exists(indexPath.resolve("_0.cfe"))); |
| assertTrue(Files.exists(indexPath.resolve("_0.si"))); |
| if (iter == 1) { |
| // we run a full commit so there should be a segments file etc. |
| assertTrue(Files.exists(indexPath.resolve("segments_1"))); |
| } else { |
| // this is an NRT reopen - no segments files yet |
| assertFalse(Files.exists(indexPath.resolve("segments_1"))); |
| } |
| w.addDocument(doc); |
| w.forceMerge(1); |
| if (iter == 1) { |
| w.commit(); |
| } |
| IndexReader r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| assertTrue(r != r2); |
| |
| // NOTE: here we rely on "Windows" behavior, ie, even |
| // though IW wanted to delete _0.cfs since it was |
| // merged away, because we have a reader open |
| // against this file, it should still be here: |
| assertTrue(Files.exists(indexPath.resolve("_0.cfs"))); |
| // forceMerge created this |
| //assertTrue(files.contains("_2.cfs")); |
| w.deleteUnusedFiles(); |
| |
| // r still holds this file open |
| assertTrue(Files.exists(indexPath.resolve("_0.cfs"))); |
| //assertTrue(files.contains("_2.cfs")); |
| |
| r.close(); |
| if (iter == 0) { |
| // on closing NRT reader, it calls writer.deleteUnusedFiles |
| assertFalse(Files.exists(indexPath.resolve("_0.cfs"))); |
| } else { |
| // now FSDir can remove it |
| dir.deletePendingFiles(); |
| assertFalse(Files.exists(indexPath.resolve("_0.cfs"))); |
| } |
| |
| w.close(); |
| r2.close(); |
| |
| dir.close(); |
| } |
| } |
| |
| public void testDeleteUnusedFiles2() throws Exception { |
| // Validates that iw.deleteUnusedFiles() also deletes unused index commits |
| // in case a deletion policy which holds onto commits is used. |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()))); |
| SnapshotDeletionPolicy sdp = (SnapshotDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); |
| |
| // First commit |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| |
| doc.add(newField("c", "val", customType)); |
| writer.addDocument(doc); |
| writer.commit(); |
| assertEquals(1, DirectoryReader.listCommits(dir).size()); |
| |
| // Keep that commit |
| IndexCommit id = sdp.snapshot(); |
| |
| // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. |
| doc = new Document(); |
| doc.add(newField("c", "val", customType)); |
| writer.addDocument(doc); |
| writer.commit(); |
| assertEquals(2, DirectoryReader.listCommits(dir).size()); |
| |
| // Should delete the unreferenced commit |
| sdp.release(id); |
| writer.deleteUnusedFiles(); |
| assertEquals(1, DirectoryReader.listCommits(dir).size()); |
| |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testEmptyFSDirWithNoLock() throws Exception { |
| // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), |
| // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed |
| // when listAll() was called in IndexFileDeleter. |
| Directory dir = newFSDirectory(createTempDir("emptyFSDirNoLock"), NoLockFactory.INSTANCE); |
| new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))).close(); |
| dir.close(); |
| } |
| |
| public void testEmptyDirRollback() throws Exception { |
| // TODO: generalize this test |
| assumeFalse("test makes assumptions about file counts", Codec.getDefault() instanceof SimpleTextCodec); |
| // Tests that if IW is created over an empty Directory, some documents are |
| // indexed, flushed (but not committed) and then IW rolls back, then no |
| // files are left in the Directory. |
| Directory dir = newDirectory(); |
| |
| String[] origFiles = dir.listAll(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2) |
| .setMergePolicy(newLogMergePolicy()) |
| .setUseCompoundFile(false)); |
| String[] files = dir.listAll(); |
| |
| // Creating over empty dir should not create any files, |
| // or, at most the write.lock file |
| final int extraFileCount = files.length - origFiles.length; |
| if (extraFileCount == 1) { |
| assertTrue(Arrays.asList(files).contains(IndexWriter.WRITE_LOCK_NAME)); |
| } else { |
| Arrays.sort(origFiles); |
| Arrays.sort(files); |
| assertArrayEquals(origFiles, files); |
| } |
| |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| // create as many files as possible |
| doc.add(newField("c", "val", customType)); |
| writer.addDocument(doc); |
| // Adding just one document does not call flush yet. |
| int computedExtraFileCount = 0; |
| for (String file : dir.listAll()) { |
| if (IndexWriter.WRITE_LOCK_NAME.equals(file) || |
| file.startsWith(IndexFileNames.SEGMENTS) || |
| IndexFileNames.CODEC_FILE_PATTERN.matcher(file).matches()) { |
| if (file.lastIndexOf('.') < 0 |
| // don't count stored fields and term vectors in, or any temporary files they might |
| || !Arrays.asList("fdm", "fdt", "tvm", "tvd", "tmp").contains(file.substring(file.lastIndexOf('.') + 1))) { |
| ++computedExtraFileCount; |
| } |
| } |
| } |
| assertEquals("only the stored and term vector files should exist in the directory", extraFileCount, computedExtraFileCount); |
| |
| doc = new Document(); |
| doc.add(newField("c", "val", customType)); |
| writer.addDocument(doc); |
| |
| // The second document should cause a flush. |
| assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount); |
| |
| // After rollback, IW should remove all files |
| writer.rollback(); |
| String allFiles[] = dir.listAll(); |
| assertEquals("no files should exist in the directory after rollback", origFiles.length + extraFileCount, allFiles.length); |
| |
| // Since we rolled-back above, that close should be a no-op |
| writer.close(); |
| allFiles = dir.listAll(); |
| assertEquals("expected a no-op close after IW.rollback()", origFiles.length + extraFileCount, allFiles.length); |
| dir.close(); |
| } |
| |
| public void testNoUnwantedTVFiles() throws Exception { |
| |
| Directory dir = newDirectory(); |
| IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setRAMBufferSizeMB(0.01) |
| .setMergePolicy(newLogMergePolicy())); |
| indexWriter.getConfig().getMergePolicy().setNoCFSRatio(0.0); |
| |
| String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg"; |
| BIG=BIG+BIG+BIG+BIG; |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setOmitNorms(true); |
| FieldType customType2 = new FieldType(TextField.TYPE_STORED); |
| customType2.setTokenized(false); |
| FieldType customType3 = new FieldType(TextField.TYPE_STORED); |
| customType3.setTokenized(false); |
| customType3.setOmitNorms(true); |
| |
| for (int i=0; i<2; i++) { |
| Document doc = new Document(); |
| doc.add(new Field("id", Integer.toString(i)+BIG, customType3)); |
| doc.add(new Field("str", Integer.toString(i)+BIG, customType2)); |
| doc.add(new Field("str2", Integer.toString(i)+BIG, storedTextType)); |
| doc.add(new Field("str3", Integer.toString(i)+BIG, customType)); |
| indexWriter.addDocument(doc); |
| } |
| |
| indexWriter.close(); |
| |
| TestUtil.checkIndex(dir); |
| |
| assertNoUnreferencedFiles(dir, "no tv files"); |
| DirectoryReader r0 = DirectoryReader.open(dir); |
| for (LeafReaderContext ctx : r0.leaves()) { |
| SegmentReader sr = (SegmentReader) ctx.reader(); |
| assertFalse(sr.getFieldInfos().hasVectors()); |
| } |
| |
| r0.close(); |
| dir.close(); |
| } |
| |
| static final class StringSplitAnalyzer extends Analyzer { |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| return new TokenStreamComponents(new StringSplitTokenizer()); |
| } |
| } |
| |
| private static class StringSplitTokenizer extends Tokenizer { |
| private String[] tokens; |
| private int upto; |
| private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| |
| public StringSplitTokenizer() { |
| super(); |
| } |
| |
| @Override |
| public final boolean incrementToken() { |
| clearAttributes(); |
| if (upto < tokens.length) { |
| termAtt.setEmpty(); |
| termAtt.append(tokens[upto]); |
| upto++; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| super.reset(); |
| this.upto = 0; |
| final StringBuilder b = new StringBuilder(); |
| final char[] buffer = new char[1024]; |
| int n; |
| while ((n = input.read(buffer)) != -1) { |
| b.append(buffer, 0, n); |
| } |
| this.tokens = b.toString().split(" "); |
| } |
| } |
| |
| /** |
| * Make sure we skip wicked long terms. |
| */ |
| public void testWickedLongTerm() throws IOException { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir, new StringSplitAnalyzer()); |
| |
| char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; |
| Arrays.fill(chars, 'x'); |
| Document hugeDoc = new Document(); |
| final String bigTerm = new String(chars); |
| |
| // This contents produces a too-long term: |
| String contents = "abc xyz x" + bigTerm + " another term"; |
| hugeDoc.add(new TextField("content", contents, Field.Store.NO)); |
| expectThrows(IllegalArgumentException.class, () -> { |
| w.addDocument(hugeDoc); |
| }); |
| |
| // Make sure we can add another normal document |
| Document doc = new Document(); |
| doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); |
| w.addDocument(doc); |
| |
| // So we remove the deleted doc: |
| w.forceMerge(1); |
| |
| IndexReader reader = w.getReader(); |
| w.close(); |
| |
| // Make sure all terms < max size were indexed |
| assertEquals(1, reader.docFreq(new Term("content", "abc"))); |
| assertEquals(1, reader.docFreq(new Term("content", "bbb"))); |
| assertEquals(0, reader.docFreq(new Term("content", "term"))); |
| |
| // Make sure the doc that has the massive term is NOT in |
| // the index: |
| assertEquals("document with wicked long term is in the index!", 1, reader.numDocs()); |
| |
| reader.close(); |
| dir.close(); |
| dir = newDirectory(); |
| |
| // Make sure we can add a document with exactly the |
| // maximum length term, and search on that term: |
| doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setTokenized(false); |
| Field contentField = new Field("content", "", customType); |
| doc.add(contentField); |
| |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| iwc.setCodec(TestUtil.getDefaultCodec()); |
| |
| RandomIndexWriter w2 = new RandomIndexWriter(random(), dir, iwc); |
| |
| contentField.setStringValue("other"); |
| w2.addDocument(doc); |
| |
| contentField.setStringValue("term"); |
| w2.addDocument(doc); |
| |
| contentField.setStringValue(bigTerm); |
| w2.addDocument(doc); |
| |
| contentField.setStringValue("zzz"); |
| w2.addDocument(doc); |
| |
| reader = w2.getReader(); |
| w2.close(); |
| assertEquals(1, reader.docFreq(new Term("content", bigTerm))); |
| |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testDeleteAllNRTLeftoverFiles() throws Exception { |
| |
| MockDirectoryWrapper d = new MockDirectoryWrapper(random(), new RAMDirectory()); |
| IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| for(int i = 0; i < 20; i++) { |
| for(int j = 0; j < 100; ++j) { |
| w.addDocument(doc); |
| } |
| w.commit(); |
| DirectoryReader.open(w).close(); |
| |
| w.deleteAll(); |
| w.commit(); |
| // Make sure we accumulate no files except for empty |
| // segments_N and segments.gen: |
| assertTrue(d.listAll().length <= 2); |
| } |
| |
| w.close(); |
| d.close(); |
| } |
| |
| public void testNRTReaderVersion() throws Exception { |
| Directory d = new MockDirectoryWrapper(random(), new RAMDirectory()); |
| IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(newStringField("id", "0", Field.Store.YES)); |
| w.addDocument(doc); |
| DirectoryReader r = w.getReader(); |
| long version = r.getVersion(); |
| r.close(); |
| |
| w.addDocument(doc); |
| r = w.getReader(); |
| long version2 = r.getVersion(); |
| r.close(); |
| assert(version2 > version); |
| |
| w.deleteDocuments(new Term("id", "0")); |
| r = w.getReader(); |
| w.close(); |
| long version3 = r.getVersion(); |
| r.close(); |
| assert(version3 > version2); |
| d.close(); |
| } |
| |
| public void testWhetherDeleteAllDeletesWriteLock() throws Exception { |
| // Must use SimpleFSLockFactory... NativeFSLockFactory |
| // somehow "knows" a lock is held against write.lock |
| // even if you remove that file: |
| Directory d = newFSDirectory(createTempDir("TestIndexWriter.testWhetherDeleteAllDeletesWriteLock"), SimpleFSLockFactory.INSTANCE); |
| RandomIndexWriter w1 = new RandomIndexWriter(random(), d); |
| w1.deleteAll(); |
| expectThrows(LockObtainFailedException.class, () -> { |
| new RandomIndexWriter(random(), d, newIndexWriterConfig(null)); |
| }); |
| |
| w1.close(); |
| d.close(); |
| } |
| |
| public void testOnlyUpdateDocuments() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, |
| new IndexWriterConfig(new MockAnalyzer(random()))); |
| |
| final List<Document> docs = new ArrayList<>(); |
| docs.add(new Document()); |
| w.updateDocuments(new Term("foo", "bar"), |
| docs); |
| w.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-3872 |
| public void testPrepareCommitThenClose() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, |
| new IndexWriterConfig(new MockAnalyzer(random()))); |
| |
| w.prepareCommit(); |
| expectThrows(IllegalStateException.class, () -> { |
| w.close(); |
| }); |
| w.commit(); |
| w.close(); |
| IndexReader r = DirectoryReader.open(dir); |
| assertEquals(0, r.maxDoc()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-3872 |
| public void testPrepareCommitThenRollback() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, |
| new IndexWriterConfig(new MockAnalyzer(random()))); |
| |
| w.prepareCommit(); |
| w.rollback(); |
| assertFalse(DirectoryReader.indexExists(dir)); |
| dir.close(); |
| } |
| |
| // LUCENE-3872 |
| public void testPrepareCommitThenRollback2() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, |
| new IndexWriterConfig(new MockAnalyzer(random()))); |
| |
| w.commit(); |
| w.addDocument(new Document()); |
| w.prepareCommit(); |
| w.rollback(); |
| assertTrue(DirectoryReader.indexExists(dir)); |
| IndexReader r = DirectoryReader.open(dir); |
| assertEquals(0, r.maxDoc()); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testDontInvokeAnalyzerForUnAnalyzedFields() throws Exception { |
| Analyzer analyzer = new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| throw new IllegalStateException("don't invoke me!"); |
| } |
| |
| @Override |
| public int getPositionIncrementGap(String fieldName) { |
| throw new IllegalStateException("don't invoke me!"); |
| } |
| |
| @Override |
| public int getOffsetGap(String fieldName) { |
| throw new IllegalStateException("don't invoke me!"); |
| } |
| }; |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer)); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(StringField.TYPE_NOT_STORED); |
| customType.setStoreTermVectors(true); |
| customType.setStoreTermVectorPositions(true); |
| customType.setStoreTermVectorOffsets(true); |
| Field f = newField("field", "abcd", customType); |
| doc.add(f); |
| doc.add(f); |
| Field f2 = newField("field", "", customType); |
| doc.add(f2); |
| doc.add(f); |
| w.addDocument(doc); |
| w.close(); |
| dir.close(); |
| } |
| |
| //LUCENE-1468 -- make sure opening an IndexWriter with |
| // create=true does not remove non-index files |
| |
| public void testOtherFiles() throws Throwable { |
| Directory dir = newDirectory(); |
| IndexWriter iw = new IndexWriter(dir, |
| newIndexWriterConfig(new MockAnalyzer(random()))); |
| iw.addDocument(new Document()); |
| iw.close(); |
| try { |
| // Create my own random file: |
| IndexOutput out = dir.createOutput("myrandomfile", newIOContext(random())); |
| out.writeByte((byte) 42); |
| out.close(); |
| |
| new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))).close(); |
| |
| assertTrue(slowFileExists(dir, "myrandomfile")); |
| } finally { |
| dir.close(); |
| } |
| } |
| |
| // LUCENE-3849 |
| public void testStopwordsPosIncHole() throws Exception { |
| Directory dir = newDirectory(); |
| Analyzer a = new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer tokenizer = new MockTokenizer(); |
| TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); |
| return new TokenStreamComponents(tokenizer, stream); |
| } |
| }; |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); |
| Document doc = new Document(); |
| doc.add(new TextField("body", "just a", Field.Store.NO)); |
| doc.add(new TextField("body", "test of gaps", Field.Store.NO)); |
| iw.addDocument(doc); |
| IndexReader ir = iw.getReader(); |
| iw.close(); |
| IndexSearcher is = newSearcher(ir); |
| PhraseQuery.Builder builder = new PhraseQuery.Builder(); |
| builder.add(new Term("body", "just"), 0); |
| builder.add(new Term("body", "test"), 2); |
| PhraseQuery pq = builder.build(); |
| // body:"just ? test" |
| assertEquals(1, is.search(pq, 5).totalHits.value); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-3849 |
| public void testStopwordsPosIncHole2() throws Exception { |
| // use two stopfilters for testing here |
| Directory dir = newDirectory(); |
| final Automaton secondSet = Automata.makeString("foobar"); |
| Analyzer a = new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer tokenizer = new MockTokenizer(); |
| TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); |
| stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet)); |
| return new TokenStreamComponents(tokenizer, stream); |
| } |
| }; |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); |
| Document doc = new Document(); |
| doc.add(new TextField("body", "just a foobar", Field.Store.NO)); |
| doc.add(new TextField("body", "test of gaps", Field.Store.NO)); |
| iw.addDocument(doc); |
| IndexReader ir = iw.getReader(); |
| iw.close(); |
| IndexSearcher is = newSearcher(ir); |
| PhraseQuery.Builder builder = new PhraseQuery.Builder(); |
| builder.add(new Term("body", "just"), 0); |
| builder.add(new Term("body", "test"), 3); |
| PhraseQuery pq = builder.build(); |
| // body:"just ? ? test" |
| assertEquals(1, is.search(pq, 5).totalHits.value); |
| ir.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-4575 |
| public void testCommitWithUserDataOnly() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(null)); |
| writer.commit(); // first commit to complete IW create transaction. |
| |
| // this should store the commit data, even though no other changes were made |
| writer.setLiveCommitData(new HashMap<String,String>() {{ |
| put("key", "value"); |
| }}.entrySet()); |
| writer.commit(); |
| |
| DirectoryReader r = DirectoryReader.open(dir); |
| assertEquals("value", r.getIndexCommit().getUserData().get("key")); |
| r.close(); |
| |
| // now check setCommitData and prepareCommit/commit sequence |
| writer.setLiveCommitData(new HashMap<String,String>() {{ |
| put("key", "value1"); |
| }}.entrySet()); |
| writer.prepareCommit(); |
| writer.setLiveCommitData(new HashMap<String,String>() {{ |
| put("key", "value2"); |
| }}.entrySet()); |
| writer.commit(); // should commit the first commitData only, per protocol |
| |
| r = DirectoryReader.open(dir); |
| assertEquals("value1", r.getIndexCommit().getUserData().get("key")); |
| r.close(); |
| |
| // now should commit the second commitData - there was a bug where |
| // IndexWriter.finishCommit overrode the second commitData |
| writer.commit(); |
| r = DirectoryReader.open(dir); |
| assertEquals("IndexWriter.finishCommit may have overridden the second commitData", |
| "value2", r.getIndexCommit().getUserData().get("key")); |
| r.close(); |
| |
| writer.close(); |
| dir.close(); |
| } |
| |
| private Map<String,String> getLiveCommitData(IndexWriter writer) { |
| Map<String,String> data = new HashMap<>(); |
| Iterable<Map.Entry<String,String>> iter = writer.getLiveCommitData(); |
| if (iter != null) { |
| for(Map.Entry<String,String> ent : iter) { |
| data.put(ent.getKey(), ent.getValue()); |
| } |
| } |
| return data; |
| } |
| |
| @Test |
| public void testGetCommitData() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(null)); |
| writer.setLiveCommitData(new HashMap<String,String>() {{ |
| put("key", "value"); |
| }}.entrySet()); |
| assertEquals("value", getLiveCommitData(writer).get("key")); |
| writer.close(); |
| |
| // validate that it's also visible when opening a new IndexWriter |
| writer = new IndexWriter(dir, newIndexWriterConfig(null) |
| .setOpenMode(OpenMode.APPEND)); |
| assertEquals("value", getLiveCommitData(writer).get("key")); |
| writer.close(); |
| |
| dir.close(); |
| } |
| |
| public void testNullAnalyzer() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(null); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); |
| |
| // add 3 good docs |
| for (int i = 0; i < 3; i++) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); |
| iw.addDocument(doc); |
| } |
| |
| // add broken doc |
| expectThrows(NullPointerException.class, () -> { |
| Document broke = new Document(); |
| broke.add(newTextField("test", "broken", Field.Store.NO)); |
| iw.addDocument(broke); |
| }); |
| |
| // ensure good docs are still ok |
| IndexReader ir = iw.getReader(); |
| assertEquals(3, ir.numDocs()); |
| ir.close(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| public void testNullDocument() throws IOException { |
| Directory dir = newDirectory(); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir); |
| |
| // add 3 good docs |
| for (int i = 0; i < 3; i++) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); |
| iw.addDocument(doc); |
| } |
| |
| // add broken doc |
| expectThrows(NullPointerException.class, () -> { |
| iw.addDocument(null); |
| }); |
| |
| // ensure good docs are still ok |
| IndexReader ir = iw.getReader(); |
| assertEquals(3, ir.numDocs()); |
| |
| ir.close(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| public void testNullDocuments() throws IOException { |
| Directory dir = newDirectory(); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir); |
| |
| // add 3 good docs |
| for (int i = 0; i < 3; i++) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); |
| iw.addDocument(doc); |
| } |
| |
| // add broken doc block |
| expectThrows(NullPointerException.class, () -> { |
| iw.addDocuments(null); |
| }); |
| |
| // ensure good docs are still ok |
| IndexReader ir = iw.getReader(); |
| assertEquals(3, ir.numDocs()); |
| |
| ir.close(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| public void testIterableFieldThrowsException() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| int iters = atLeast(100); |
| int docCount = 0; |
| int docId = 0; |
| Set<String> liveIds = new HashSet<>(); |
| for (int i = 0; i < iters; i++) { |
| int numDocs = atLeast(4); |
| for (int j = 0; j < numDocs; j++) { |
| String id = Integer.toString(docId++); |
| final List<IndexableField> fields = new ArrayList<>(); |
| fields.add(new StringField("id", id, Field.Store.YES)); |
| fields.add(new StringField("foo", TestUtil.randomSimpleString(random()), Field.Store.NO)); |
| docId++; |
| |
| boolean success = false; |
| try { |
| w.addDocument(new RandomFailingIterable<IndexableField>(fields, random())); |
| success = true; |
| } catch (RuntimeException e) { |
| assertEquals("boom", e.getMessage()); |
| } finally { |
| if (success) { |
| docCount++; |
| liveIds.add(id); |
| } |
| } |
| } |
| } |
| DirectoryReader reader = w.getReader(); |
| assertEquals(docCount, reader.numDocs()); |
| List<LeafReaderContext> leaves = reader.leaves(); |
| for (LeafReaderContext leafReaderContext : leaves) { |
| LeafReader ar = leafReaderContext.reader(); |
| Bits liveDocs = ar.getLiveDocs(); |
| int maxDoc = ar.maxDoc(); |
| for (int i = 0; i < maxDoc; i++) { |
| if (liveDocs == null || liveDocs.get(i)) { |
| assertTrue(liveIds.remove(ar.document(i).get("id"))); |
| } |
| } |
| } |
| assertTrue(liveIds.isEmpty()); |
| w.close(); |
| IOUtils.close(reader, dir); |
| } |
| |
| public void testIterableThrowsException() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| int iters = atLeast(100); |
| int docCount = 0; |
| int docId = 0; |
| Set<String> liveIds = new HashSet<>(); |
| for (int i = 0; i < iters; i++) { |
| int numDocs = atLeast(4); |
| for (int j = 0; j < numDocs; j++) { |
| String id = Integer.toString(docId++); |
| final List<IndexableField> fields = new ArrayList<>(); |
| fields.add(new StringField("id", id, Field.Store.YES)); |
| fields.add(new StringField("foo", TestUtil.randomSimpleString(random()), Field.Store.NO)); |
| docId++; |
| |
| boolean success = false; |
| try { |
| w.addDocument(new RandomFailingIterable<IndexableField>(fields, random())); |
| success = true; |
| } catch (RuntimeException e) { |
| assertEquals("boom", e.getMessage()); |
| } finally { |
| if (success) { |
| docCount++; |
| liveIds.add(id); |
| } |
| } |
| } |
| } |
| DirectoryReader reader = w.getReader(); |
| assertEquals(docCount, reader.numDocs()); |
| List<LeafReaderContext> leaves = reader.leaves(); |
| for (LeafReaderContext leafReaderContext : leaves) { |
| LeafReader ar = leafReaderContext.reader(); |
| Bits liveDocs = ar.getLiveDocs(); |
| int maxDoc = ar.maxDoc(); |
| for (int i = 0; i < maxDoc; i++) { |
| if (liveDocs == null || liveDocs.get(i)) { |
| assertTrue(liveIds.remove(ar.document(i).get("id"))); |
| } |
| } |
| } |
| assertTrue(liveIds.isEmpty()); |
| w.close(); |
| IOUtils.close(reader, dir); |
| } |
| |
| public void testIterableThrowsException2() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Exception expected = expectThrows(Exception.class, () -> { |
| w.addDocuments(new Iterable<Document>() { |
| @Override |
| public Iterator<Document> iterator() { |
| return new Iterator<Document>() { |
| |
| @Override |
| public boolean hasNext() { |
| return true; |
| } |
| |
| @Override |
| public Document next() { |
| throw new RuntimeException("boom"); |
| } |
| |
| @Override |
| public void remove() { assert false; } |
| }; |
| } |
| }); |
| }); |
| assertEquals("boom", expected.getMessage()); |
| |
| w.close(); |
| IOUtils.close(dir); |
| } |
| |
| private static class RandomFailingIterable<T> implements Iterable<T> { |
| private final Iterable<? extends T> list; |
| private final int failOn; |
| |
| public RandomFailingIterable(Iterable<? extends T> list, Random random) { |
| this.list = list; |
| this.failOn = random.nextInt(5); |
| } |
| |
| @Override |
| public Iterator<T> iterator() { |
| final Iterator<? extends T> docIter = list.iterator(); |
| return new Iterator<T>() { |
| int count = 0; |
| |
| @Override |
| public boolean hasNext() { |
| return docIter.hasNext(); |
| } |
| |
| @Override |
| public T next() { |
| if (count == failOn) { |
| throw new RuntimeException("boom"); |
| } |
| count++; |
| return docIter.next(); |
| } |
| |
| @Override |
| public void remove() {throw new UnsupportedOperationException();} |
| }; |
| } |
| } |
| |
| // LUCENE-2727/LUCENE-2812/LUCENE-4738: |
| public void testCorruptFirstCommit() throws Exception { |
| for(int i=0;i<6;i++) { |
| BaseDirectoryWrapper dir = newDirectory(); |
| |
| // Create a corrupt first commit: |
| dir.createOutput(IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, |
| "", |
| 0), IOContext.DEFAULT).close(); |
| |
| IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); |
| int mode = i/2; |
| if (mode == 0) { |
| iwc.setOpenMode(OpenMode.CREATE); |
| } else if (mode == 1) { |
| iwc.setOpenMode(OpenMode.APPEND); |
| } else if (mode == 2) { |
| iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); |
| } |
| |
| if (VERBOSE) { |
| System.out.println("\nTEST: i=" + i); |
| } |
| |
| try { |
| if ((i & 1) == 0) { |
| new IndexWriter(dir, iwc).close(); |
| } else { |
| new IndexWriter(dir, iwc).rollback(); |
| } |
| } catch (IOException ioe) { |
| // OpenMode.APPEND should throw an exception since no |
| // index exists: |
| if (mode == 0) { |
| // Unexpected |
| throw ioe; |
| } |
| } |
| |
| if (VERBOSE) { |
| System.out.println(" at close: " + Arrays.toString(dir.listAll())); |
| } |
| |
| if (mode != 0) { |
| dir.setCheckIndexOnClose(false); |
| } |
| |
| dir.close(); |
| } |
| } |
| |
| public void testHasUncommittedChanges() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| assertTrue(writer.hasUncommittedChanges()); // this will be true because a commit will create an empty index |
| Document doc = new Document(); |
| doc.add(newTextField("myfield", "a b c", Field.Store.NO)); |
| writer.addDocument(doc); |
| assertTrue(writer.hasUncommittedChanges()); |
| |
| // Must commit, waitForMerges, commit again, to be |
| // certain that hasUncommittedChanges returns false: |
| writer.commit(); |
| writer.waitForMerges(); |
| writer.commit(); |
| assertFalse(writer.hasUncommittedChanges()); |
| writer.addDocument(doc); |
| assertTrue(writer.hasUncommittedChanges()); |
| writer.commit(); |
| doc = new Document(); |
| doc.add(newStringField("id", "xyz", Field.Store.YES)); |
| writer.addDocument(doc); |
| assertTrue(writer.hasUncommittedChanges()); |
| |
| // Must commit, waitForMerges, commit again, to be |
| // certain that hasUncommittedChanges returns false: |
| writer.commit(); |
| writer.waitForMerges(); |
| writer.commit(); |
| assertFalse(writer.hasUncommittedChanges()); |
| writer.deleteDocuments(new Term("id", "xyz")); |
| assertTrue(writer.hasUncommittedChanges()); |
| |
| // Must commit, waitForMerges, commit again, to be |
| // certain that hasUncommittedChanges returns false: |
| writer.commit(); |
| writer.waitForMerges(); |
| writer.commit(); |
| assertFalse(writer.hasUncommittedChanges()); |
| writer.close(); |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| assertFalse(writer.hasUncommittedChanges()); |
| writer.addDocument(doc); |
| assertTrue(writer.hasUncommittedChanges()); |
| |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testMergeAllDeleted() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); |
| AtomicBoolean keepFullyDeletedSegments = new AtomicBoolean(); |
| iwc.setMergePolicy(new FilterMergePolicy(iwc.getMergePolicy()) { |
| @Override |
| public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) throws IOException { |
| return keepFullyDeletedSegments.get(); |
| } |
| }); |
| final SetOnce<IndexWriter> iwRef = new SetOnce<>(); |
| IndexWriter evilWriter = RandomIndexWriter.mockIndexWriter(random(), dir, iwc, new RandomIndexWriter.TestPoint() { |
| @Override |
| public void apply(String message) { |
| if ("startCommitMerge".equals(message)) { |
| keepFullyDeletedSegments.set(false); |
| } else if ("startMergeInit".equals(message)) { |
| keepFullyDeletedSegments.set(true); |
| } |
| } |
| }); |
| iwRef.set(evilWriter); |
| for (int i = 0; i < 1000; i++) { |
| addDoc(evilWriter); |
| if (random().nextInt(17) == 0) { |
| evilWriter.commit(); |
| } |
| } |
| evilWriter.deleteDocuments(new MatchAllDocsQuery()); |
| evilWriter.forceMerge(1); |
| evilWriter.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-5239 |
| public void testDeleteSameTermAcrossFields() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| doc.add(new TextField("a", "foo", Field.Store.NO)); |
| w.addDocument(doc); |
| |
| // Should not delete the document; with LUCENE-5239 the |
| // "foo" from the 2nd delete term would incorrectly |
| // match field a's "foo": |
| w.deleteDocuments(new Term("a", "xxx")); |
| w.deleteDocuments(new Term("b", "foo")); |
| IndexReader r = w.getReader(); |
| w.close(); |
| |
| // Make sure document was not (incorrectly) deleted: |
| assertEquals(1, r.numDocs()); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testHasUncommittedChangesAfterException() throws IOException { |
| Analyzer analyzer = new MockAnalyzer(random()); |
| |
| Directory directory = newDirectory(); |
| // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!! |
| IndexWriterConfig iwc = newIndexWriterConfig(analyzer); |
| iwc.setMergePolicy(newLogMergePolicy()); |
| IndexWriter iwriter = new IndexWriter(directory, iwc); |
| Document doc = new Document(); |
| doc.add(new SortedDocValuesField("dv", newBytesRef("foo!"))); |
| doc.add(new SortedDocValuesField("dv", newBytesRef("bar!"))); |
| expectThrows( |
| IllegalArgumentException.class, |
| () -> { |
| iwriter.addDocument(doc); |
| }); |
| |
| iwriter.commit(); |
| assertFalse(iwriter.hasUncommittedChanges()); |
| iwriter.close(); |
| directory.close(); |
| } |
| |
| public void testDoubleClose() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(new SortedDocValuesField("dv", newBytesRef("foo!"))); |
| w.addDocument(doc); |
| w.close(); |
| // Close again should have no effect |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testRollbackThenClose() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(new SortedDocValuesField("dv", newBytesRef("foo!"))); |
| w.addDocument(doc); |
| w.rollback(); |
| // Close after rollback should have no effect |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testCloseThenRollback() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(new SortedDocValuesField("dv", newBytesRef("foo!"))); |
| w.addDocument(doc); |
| w.close(); |
| // Rollback after close should have no effect |
| w.rollback(); |
| dir.close(); |
| } |
| |
| public void testCloseWhileMergeIsRunning() throws IOException { |
| Directory dir = newDirectory(); |
| |
| final CountDownLatch mergeStarted = new CountDownLatch(1); |
| final CountDownLatch closeStarted = new CountDownLatch(1); |
| |
| IndexWriterConfig iwc = newIndexWriterConfig(random(), new MockAnalyzer(random())).setCommitOnClose(false); |
| LogDocMergePolicy mp = new LogDocMergePolicy(); |
| mp.setMergeFactor(2); |
| iwc.setMergePolicy(mp); |
| iwc.setInfoStream(new InfoStream() { |
| @Override |
| public boolean isEnabled(String component) { |
| return true; |
| } |
| |
| @Override |
| public void message(String component, String message) { |
| if (message.equals("rollback")) { |
| closeStarted.countDown(); |
| } |
| } |
| |
| @Override |
| public void close() { |
| } |
| }); |
| |
| iwc.setMergeScheduler(new ConcurrentMergeScheduler() { |
| @Override |
| public void doMerge(MergeSource mergeSource, MergePolicy.OneMerge merge) throws IOException { |
| mergeStarted.countDown(); |
| try { |
| closeStarted.await(); |
| } catch (InterruptedException ie) { |
| Thread.currentThread().interrupt(); |
| throw new RuntimeException(ie); |
| } |
| super.doMerge(mergeSource, merge); |
| } |
| |
| @Override |
| public void close() { |
| } |
| }); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| doc.add(new SortedDocValuesField("dv", newBytesRef("foo!"))); |
| w.addDocument(doc); |
| w.commit(); |
| w.addDocument(doc); |
| w.commit(); |
| w.close(); |
| dir.close(); |
| } |
| |
| /** Make sure that close waits for any still-running commits. */ |
| public void testCloseDuringCommit() throws Exception { |
| |
| final CountDownLatch startCommit = new CountDownLatch(1); |
| final CountDownLatch finishCommit = new CountDownLatch(1); |
| |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(null); |
| // use an infostream that "takes a long time" to commit |
| final IndexWriter iw = RandomIndexWriter.mockIndexWriter(random(), dir, iwc, new RandomIndexWriter.TestPoint() { |
| @Override |
| public void apply(String message) { |
| if (message.equals("finishStartCommit")) { |
| startCommit.countDown(); |
| try { |
| Thread.sleep(10); |
| } catch (InterruptedException ie) { |
| throw new ThreadInterruptedException(ie); |
| } |
| } |
| } |
| }); |
| new Thread() { |
| @Override |
| public void run() { |
| try { |
| iw.commit(); |
| finishCommit.countDown(); |
| } catch (IOException ioe) { |
| throw new RuntimeException(ioe); |
| } |
| } |
| }.start(); |
| startCommit.await(); |
| try { |
| iw.close(); |
| } catch (IllegalStateException ise) { |
| // OK, but not required (depends on thread scheduling) |
| } |
| finishCommit.await(); |
| iw.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-5895: |
| |
| /** Make sure we see ids per segment and per commit. */ |
| public void testIds() throws Exception { |
| Directory d = newDirectory(); |
| IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); |
| w.addDocument(new Document()); |
| w.close(); |
| |
| SegmentInfos sis = SegmentInfos.readLatestCommit(d); |
| byte[] id1 = sis.getId(); |
| assertNotNull(id1); |
| assertEquals(StringHelper.ID_LENGTH, id1.length); |
| |
| byte[] id2 = sis.info(0).info.getId(); |
| byte[] sciId2 = sis.info(0).getId(); |
| assertNotNull(id2); |
| assertNotNull(sciId2); |
| assertEquals(StringHelper.ID_LENGTH, id2.length); |
| assertEquals(StringHelper.ID_LENGTH, sciId2.length); |
| |
| // Make sure CheckIndex includes id output: |
| ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); |
| CheckIndex checker = new CheckIndex(d); |
| checker.setDoSlowChecks(false); |
| checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false); |
| CheckIndex.Status indexStatus = checker.checkIndex(null); |
| String s = bos.toString(IOUtils.UTF_8); |
| checker.close(); |
| // Make sure CheckIndex didn't fail |
| assertTrue(s, indexStatus != null && indexStatus.clean); |
| |
| // Commit id is always stored: |
| assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1))); |
| |
| assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1))); |
| d.close(); |
| |
| Set<String> ids = new HashSet<>(); |
| for(int i=0;i<100000;i++) { |
| String id = StringHelper.idToString(StringHelper.randomId()); |
| assertFalse("id=" + id + " i=" + i, ids.contains(id)); |
| ids.add(id); |
| } |
| } |
| |
| public void testEmptyNorm() throws Exception { |
| Directory d = newDirectory(); |
| IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(new TextField("foo", new CannedTokenStream())); |
| w.addDocument(doc); |
| w.commit(); |
| w.close(); |
| DirectoryReader r = DirectoryReader.open(d); |
| NumericDocValues norms = getOnlyLeafReader(r).getNormValues("foo"); |
| assertEquals(0, norms.nextDoc()); |
| assertEquals(0, norms.longValue()); |
| r.close(); |
| d.close(); |
| } |
| |
| public void testManySeparateThreads() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| iwc.setMaxBufferedDocs(1000); |
| final IndexWriter w = new IndexWriter(dir, iwc); |
| // Index 100 docs, each from a new thread, but always only 1 thread is in IW at once: |
| for(int i=0;i<100;i++) { |
| Thread thread = new Thread() { |
| @Override |
| public void run() { |
| Document doc = new Document(); |
| doc.add(newStringField("foo", "bar", Field.Store.NO)); |
| try { |
| w.addDocument(doc); |
| } catch (IOException ioe) { |
| throw new RuntimeException(ioe); |
| } |
| } |
| }; |
| thread.start(); |
| thread.join(); |
| } |
| w.close(); |
| |
| IndexReader r = DirectoryReader.open(dir); |
| assertEquals(1, r.leaves().size()); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-6505 |
| public void testNRTSegmentsFile() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| // creates segments_1 |
| w.commit(); |
| |
| // newly opened NRT reader should see gen=1 segments file |
| DirectoryReader r = DirectoryReader.open(w); |
| assertEquals(1, r.getIndexCommit().getGeneration()); |
| assertEquals("segments_1", r.getIndexCommit().getSegmentsFileName()); |
| |
| // newly opened non-NRT reader should see gen=1 segments file |
| DirectoryReader r2 = DirectoryReader.open(dir); |
| assertEquals(1, r2.getIndexCommit().getGeneration()); |
| assertEquals("segments_1", r2.getIndexCommit().getSegmentsFileName()); |
| r2.close(); |
| |
| // make a change and another commit |
| w.addDocument(new Document()); |
| w.commit(); |
| DirectoryReader r3 = DirectoryReader.openIfChanged(r); |
| r.close(); |
| assertNotNull(r3); |
| |
| // reopened NRT reader should see gen=2 segments file |
| assertEquals(2, r3.getIndexCommit().getGeneration()); |
| assertEquals("segments_2", r3.getIndexCommit().getSegmentsFileName()); |
| r3.close(); |
| |
| // newly opened non-NRT reader should see gen=2 segments file |
| DirectoryReader r4 = DirectoryReader.open(dir); |
| assertEquals(2, r4.getIndexCommit().getGeneration()); |
| assertEquals("segments_2", r4.getIndexCommit().getSegmentsFileName()); |
| r4.close(); |
| |
| w.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-6505 |
| public void testNRTAfterCommit() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.commit(); |
| |
| w.addDocument(new Document()); |
| DirectoryReader r = DirectoryReader.open(w); |
| w.commit(); |
| |
| // commit even with no other changes counts as a "change" that NRT reader reopen will see: |
| DirectoryReader r2 = DirectoryReader.open(dir); |
| assertNotNull(r2); |
| assertEquals(2, r2.getIndexCommit().getGeneration()); |
| assertEquals("segments_2", r2.getIndexCommit().getSegmentsFileName()); |
| |
| IOUtils.close(r, r2, w, dir); |
| } |
| |
| // LUCENE-6505 |
| public void testNRTAfterSetUserDataWithoutCommit() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.commit(); |
| |
| DirectoryReader r = DirectoryReader.open(w); |
| Map<String,String> m = new HashMap<>(); |
| m.put("foo", "bar"); |
| w.setLiveCommitData(m.entrySet()); |
| |
| // setLiveCommitData with no other changes should count as an NRT change: |
| DirectoryReader r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| |
| IOUtils.close(r2, r, w, dir); |
| } |
| |
| // LUCENE-6505 |
| public void testNRTAfterSetUserDataWithCommit() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.commit(); |
| |
| DirectoryReader r = DirectoryReader.open(w); |
| Map<String,String> m = new HashMap<>(); |
| m.put("foo", "bar"); |
| w.setLiveCommitData(m.entrySet()); |
| w.commit(); |
| // setLiveCommitData and also commit, with no other changes, should count as an NRT change: |
| DirectoryReader r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| IOUtils.close(r, r2, w, dir); |
| } |
| |
| // LUCENE-6523 |
| public void testCommitImmediatelyAfterNRTReopen() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.commit(); |
| |
| w.addDocument(new Document()); |
| |
| DirectoryReader r = DirectoryReader.open(w); |
| w.commit(); |
| |
| assertFalse(r.isCurrent()); |
| |
| DirectoryReader r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| // segments_N should have changed: |
| assertFalse(r2.getIndexCommit().getSegmentsFileName().equals(r.getIndexCommit().getSegmentsFileName())); |
| IOUtils.close(r, r2, w, dir); |
| } |
| |
| public void testPendingDeleteDVGeneration() throws IOException { |
| // irony: currently we don't emulate windows well enough to work on windows! |
| assumeFalse("windows is not supported", Constants.WINDOWS); |
| |
| Path path = createTempDir(); |
| |
| // Use WindowsFS to prevent open files from being deleted: |
| FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///")); |
| Path root = new FilterPath(path, fs); |
| |
| // MMapDirectory doesn't work because it closes its file handles after mapping! |
| List<Closeable> toClose = new ArrayList<>(); |
| try (FSDirectory dir = new SimpleFSDirectory(root); |
| Closeable closeable = () -> IOUtils.close(toClose)) { |
| assert closeable != null; |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())) |
| .setUseCompoundFile(false) |
| .setMergePolicy(NoMergePolicy.INSTANCE) // avoid merging away the randomFile |
| .setMaxBufferedDocs(2) |
| .setRAMBufferSizeMB(-1); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| d.add(new NumericDocValuesField("id", 1)); |
| w.addDocument(d); |
| d = new Document(); |
| d.add(new StringField("id", "2", Field.Store.YES)); |
| d.add(new NumericDocValuesField("id", 2)); |
| w.addDocument(d); |
| w.flush(); |
| d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| d.add(new NumericDocValuesField("id", 1)); |
| w.updateDocument(new Term("id", "1"), d); |
| w.commit(); |
| Set<String> files = new HashSet<>(Arrays.asList(dir.listAll())); |
| int numIters = 10 + random().nextInt(50); |
| for (int i = 0; i < numIters; i++) { |
| if (random().nextBoolean()) { |
| d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| d.add(new NumericDocValuesField("id", 1)); |
| w.updateDocument(new Term("id", "1"), d); |
| } else if (random().nextBoolean()) { |
| w.deleteDocuments(new Term("id", "2")); |
| } else { |
| w.updateNumericDocValue(new Term("id", "1"), "id", 2); |
| } |
| w.prepareCommit(); |
| List<String> newFiles = new ArrayList<>(Arrays.asList(dir.listAll())); |
| newFiles.removeAll(files); |
| String randomFile = RandomPicks.randomFrom(random(), newFiles); |
| toClose.add(dir.openInput(randomFile, IOContext.DEFAULT)); |
| w.rollback(); |
| iwc = new IndexWriterConfig(new MockAnalyzer(random())) |
| .setUseCompoundFile(false) |
| .setMergePolicy(NoMergePolicy.INSTANCE) |
| .setMaxBufferedDocs(2) |
| .setRAMBufferSizeMB(-1); |
| w = new IndexWriter(dir, iwc); |
| expectThrows(NoSuchFileException.class, () -> { |
| dir.deleteFile(randomFile); |
| }); |
| } |
| w.close(); |
| } |
| |
| } |
| |
| public void testPendingDeletionsRollbackWithReader() throws IOException { |
| // irony: currently we don't emulate windows well enough to work on windows! |
| assumeFalse("windows is not supported", Constants.WINDOWS); |
| |
| Path path = createTempDir(); |
| |
| // Use WindowsFS to prevent open files from being deleted: |
| FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///")); |
| Path root = new FilterPath(path, fs); |
| try (FSDirectory _dir = new SimpleFSDirectory(root)) { |
| Directory dir = new FilterDirectory(_dir) {}; |
| |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| d.add(new NumericDocValuesField("numval", 1)); |
| w.addDocument(d); |
| w.commit(); |
| w.addDocument(d); |
| w.flush(); |
| DirectoryReader reader = DirectoryReader.open(w); |
| w.rollback(); |
| |
| // try-delete superfluous files (some will fail due to windows-fs) |
| IndexWriterConfig iwc2 = new IndexWriterConfig(new MockAnalyzer(random())); |
| new IndexWriter(dir, iwc2).close(); |
| |
| // test that we can index on top of pending deletions |
| IndexWriterConfig iwc3 = new IndexWriterConfig(new MockAnalyzer(random())); |
| w = new IndexWriter(dir, iwc3); |
| w.addDocument(d); |
| w.commit(); |
| |
| reader.close(); |
| w.close(); |
| } |
| } |
| |
| public void testWithPendingDeletions() throws Exception { |
| // irony: currently we don't emulate windows well enough to work on windows! |
| assumeFalse("windows is not supported", Constants.WINDOWS); |
| |
| Path path = createTempDir(); |
| |
| // Use WindowsFS to prevent open files from being deleted: |
| FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///")); |
| Path root = new FilterPath(path, fs); |
| IndexCommit indexCommit; |
| DirectoryReader reader; |
| // MMapDirectory doesn't work because it closes its file handles after mapping! |
| try (FSDirectory dir = new SimpleFSDirectory(root)) { |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.commit(); |
| reader = w.getReader(); |
| // we pull this commit to open it again later to check that we fail if a future file delete is pending |
| indexCommit = reader.getIndexCommit(); |
| w.close(); |
| w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); |
| w.addDocument(new Document()); |
| w.close(); |
| IndexInput in = dir.openInput("segments_2", IOContext.DEFAULT); |
| dir.deleteFile("segments_2"); |
| assertTrue(dir.getPendingDeletions().size() > 0); |
| |
| // make sure we get NoSuchFileException if we try to delete and already-pending-delete file: |
| expectThrows(NoSuchFileException.class, () -> { |
| dir.deleteFile("segments_2"); |
| }); |
| |
| try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(indexCommit))) { |
| writer.addDocument(new Document()); |
| writer.commit(); |
| assertEquals(1, writer.getDocStats().maxDoc); |
| // now check that we moved to 3 |
| dir.openInput("segments_3", IOContext.READ).close();; |
| } |
| reader.close(); |
| in.close(); |
| } |
| } |
| |
| public void testPendingDeletesAlreadyWrittenFiles() throws IOException { |
| Path path = createTempDir(); |
| // irony: currently we don't emulate windows well enough to work on windows! |
| assumeFalse("windows is not supported", Constants.WINDOWS); |
| |
| // Use WindowsFS to prevent open files from being deleted: |
| FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///")); |
| Path root = new FilterPath(path, fs); |
| DirectoryReader reader; |
| // MMapDirectory doesn't work because it closes its file handles after mapping! |
| try (FSDirectory dir = new SimpleFSDirectory(root)) { |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.commit(); |
| IndexInput in = dir.openInput("segments_1", IOContext.DEFAULT); |
| w.addDocument(new Document()); |
| w.close(); |
| |
| assertTrue(dir.getPendingDeletions().size() > 0); |
| |
| // make sure we get NoSuchFileException if we try to delete and already-pending-delete file: |
| expectThrows(NoSuchFileException.class, () -> { |
| dir.deleteFile("segments_1"); |
| }); |
| new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random()))).close(); |
| in.close(); |
| } |
| } |
| |
| public void testLeftoverTempFiles() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| w.close(); |
| |
| IndexOutput out = dir.createTempOutput("_0", "bkd", IOContext.DEFAULT); |
| String tempName = out.getName(); |
| out.close(); |
| iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| w = new IndexWriter(dir, iwc); |
| |
| // Make sure IW deleted the unref'd file: |
| try { |
| dir.openInput(tempName, IOContext.DEFAULT); |
| fail("did not hit exception"); |
| } catch (FileNotFoundException | NoSuchFileException e) { |
| // expected |
| } |
| w.close(); |
| dir.close(); |
| } |
| |
| @Ignore("requires running tests with biggish heap") |
| public void testMassiveField() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); |
| final IndexWriter w = new IndexWriter(dir, iwc); |
| |
| StringBuilder b = new StringBuilder(); |
| while (b.length() <= IndexWriter.MAX_STORED_STRING_LENGTH) { |
| b.append("x "); |
| } |
| |
| final Document doc = new Document(); |
| //doc.add(new TextField("big", b.toString(), Field.Store.YES)); |
| doc.add(new StoredField("big", b.toString())); |
| Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);}); |
| assertEquals("stored field \"big\" is too large (" + b.length() + " characters) to store", e.getMessage()); |
| |
| // make sure writer is still usable: |
| Document doc2 = new Document(); |
| doc2.add(new StringField("id", "foo", Field.Store.YES)); |
| w.addDocument(doc2); |
| |
| DirectoryReader r = DirectoryReader.open(w); |
| assertEquals(1, r.numDocs()); |
| r.close(); |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testRecordsIndexCreatedVersion() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); |
| w.commit(); |
| w.close(); |
| assertEquals(Version.LATEST.major, SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor()); |
| dir.close(); |
| } |
| |
| public void testFlushLargestWriter() throws IOException, InterruptedException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); |
| int numDocs = indexDocsForMultipleDWPTs(w); |
| DocumentsWriterPerThread largestNonPendingWriter |
| = w.docWriter.flushControl.findLargestNonPendingWriter(); |
| assertFalse(largestNonPendingWriter.isFlushPending()); |
| |
| int numRamDocs = w.numRamDocs(); |
| int numDocsInDWPT = largestNonPendingWriter.getNumDocsInRAM(); |
| assertTrue(w.flushNextBuffer()); |
| assertTrue(largestNonPendingWriter.hasFlushed()); |
| assertEquals(numRamDocs-numDocsInDWPT, w.numRamDocs()); |
| |
| // make sure it's not locked |
| largestNonPendingWriter.lock(); |
| largestNonPendingWriter.unlock(); |
| if (random().nextBoolean()) { |
| w.commit(); |
| } |
| DirectoryReader reader = DirectoryReader.open(w, true, true); |
| assertEquals(numDocs, reader.numDocs()); |
| reader.close(); |
| w.close(); |
| dir.close(); |
| } |
| |
| private int indexDocsForMultipleDWPTs(IndexWriter w) throws InterruptedException { |
| Thread[] threads = new Thread[3]; |
| CountDownLatch latch = new CountDownLatch(threads.length); |
| int numDocsPerThread = 10 + random().nextInt(30); |
| // ensure we have more than on thread state |
| for (int i = 0; i < threads.length; i++) { |
| threads[i] = new Thread(() -> { |
| latch.countDown(); |
| try { |
| latch.await(); |
| for (int j = 0; j < numDocsPerThread; j++) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", "foo", Field.Store.YES)); |
| w.addDocument(doc); |
| } |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } |
| }); |
| threads[i].start(); |
| } |
| for (Thread t : threads) { |
| t.join(); |
| } |
| return numDocsPerThread * threads.length; |
| } |
| |
| public void testNeverCheckOutOnFullFlush() throws IOException, InterruptedException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); |
| indexDocsForMultipleDWPTs(w); |
| DocumentsWriterPerThread largestNonPendingWriter |
| = w.docWriter.flushControl.findLargestNonPendingWriter(); |
| assertFalse(largestNonPendingWriter.isFlushPending()); |
| assertFalse(largestNonPendingWriter.hasFlushed()); |
| int threadPoolSize = w.docWriter.perThreadPool.size(); |
| w.docWriter.flushControl.markForFullFlush(); |
| DocumentsWriterPerThread documentsWriterPerThread = w.docWriter.flushControl.checkoutLargestNonPendingWriter(); |
| assertNull(documentsWriterPerThread); |
| assertEquals(threadPoolSize, w.docWriter.flushControl.numQueuedFlushes()); |
| w.docWriter.flushControl.abortFullFlushes(); |
| assertNull("was aborted", w.docWriter.flushControl.checkoutLargestNonPendingWriter()); |
| assertEquals(0, w.docWriter.flushControl.numQueuedFlushes()); |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testHoldLockOnLargestWriter() throws IOException, InterruptedException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); |
| int numDocs = indexDocsForMultipleDWPTs(w); |
| DocumentsWriterPerThread largestNonPendingWriter |
| = w.docWriter.flushControl.findLargestNonPendingWriter(); |
| assertFalse(largestNonPendingWriter.isFlushPending()); |
| assertFalse(largestNonPendingWriter.hasFlushed()); |
| |
| CountDownLatch wait = new CountDownLatch(1); |
| CountDownLatch locked = new CountDownLatch(1); |
| Thread lockThread = new Thread(() -> { |
| try { |
| largestNonPendingWriter.lock(); |
| locked.countDown(); |
| wait.await(); |
| } catch (InterruptedException e) { |
| throw new AssertionError(e); |
| } finally { |
| largestNonPendingWriter.unlock(); |
| } |
| }); |
| lockThread.start(); |
| Thread flushThread = new Thread(() -> { |
| try { |
| locked.await(); |
| assertTrue(w.flushNextBuffer()); |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } |
| }); |
| flushThread.start(); |
| |
| locked.await(); |
| // access a synced method to ensure we never lock while we hold the flush control monitor |
| w.docWriter.flushControl.activeBytes(); |
| wait.countDown(); |
| lockThread.join(); |
| flushThread.join(); |
| |
| assertTrue("largest DWPT should be flushed", largestNonPendingWriter.hasFlushed()); |
| // make sure it's not locked |
| largestNonPendingWriter.lock(); |
| largestNonPendingWriter.unlock(); |
| if (random().nextBoolean()) { |
| w.commit(); |
| } |
| DirectoryReader reader = DirectoryReader.open(w, true, true); |
| assertEquals(numDocs, reader.numDocs()); |
| reader.close(); |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testCheckPendingFlushPostUpdate() throws IOException, InterruptedException { |
| MockDirectoryWrapper dir = newMockDirectory(); |
| Set<String> flushingThreads = Collections.synchronizedSet(new HashSet<>()); |
| dir.failOn(new MockDirectoryWrapper.Failure() { |
| @Override |
| public void eval(MockDirectoryWrapper dir) throws IOException { |
| if (callStackContains(DocumentsWriterPerThread.class, "flush")) { |
| flushingThreads.add(Thread.currentThread().getName()); |
| } |
| } |
| }); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig() |
| .setCheckPendingFlushUpdate(false) |
| .setMaxBufferedDocs(Integer.MAX_VALUE) |
| .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)); |
| AtomicBoolean done = new AtomicBoolean(false); |
| int numThreads = 2 + random().nextInt(3); |
| CountDownLatch latch = new CountDownLatch(numThreads); |
| Set<String> indexingThreads = new HashSet<>(); |
| Thread[] threads = new Thread[numThreads]; |
| for (int i = 0; i < numThreads; i++) { |
| threads[i] = new Thread(() -> { |
| latch.countDown(); |
| int numDocs = 0; |
| while (done.get() == false) { |
| |
| Document doc = new Document(); |
| doc.add(new StringField("id", "foo", Field.Store.YES)); |
| try { |
| w.addDocument(doc); |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } |
| if (numDocs++ % 10 == 0) { |
| Thread.yield(); |
| } |
| } |
| }); |
| indexingThreads.add(threads[i].getName()); |
| threads[i].start(); |
| } |
| latch.await(); |
| try { |
| int numIters = rarely() ? 1 + random().nextInt(5) : 1; |
| for (int i = 0; i < numIters; i++) { |
| waitForDocsInBuffers(w, Math.min(2, threads.length)); |
| w.commit(); |
| assertTrue(flushingThreads.toString(), flushingThreads.contains(Thread.currentThread().getName())); |
| flushingThreads.retainAll(indexingThreads); |
| assertTrue(flushingThreads.toString(), flushingThreads.isEmpty()); |
| } |
| w.getConfig().setCheckPendingFlushUpdate(true); |
| numIters = 0; |
| while (true) { |
| assertFalse("should finish in less than 100 iterations", numIters++ >= 100); |
| waitForDocsInBuffers(w, Math.min(2, threads.length)); |
| w.flush(); |
| flushingThreads.retainAll(indexingThreads); |
| if (flushingThreads.isEmpty() == false) { |
| break; |
| } |
| } |
| } finally { |
| done.set(true); |
| for (int i = 0; i < numThreads; i++) { |
| threads[i].join(); |
| } |
| IOUtils.close(w, dir); |
| } |
| } |
| |
| private static void waitForDocsInBuffers(IndexWriter w, int buffersWithDocs) { |
| // wait until at least N DWPTs have a doc in order to observe |
| // who flushes the segments. |
| while(true) { |
| int numStatesWithDocs = 0; |
| DocumentsWriterPerThreadPool perThreadPool = w.docWriter.perThreadPool; |
| for (DocumentsWriterPerThread dwpt : perThreadPool) { |
| dwpt.lock(); |
| try { |
| if (dwpt.getNumDocsInRAM() > 1) { |
| numStatesWithDocs++; |
| } |
| } finally { |
| dwpt.unlock(); |
| } |
| } |
| if (numStatesWithDocs >= buffersWithDocs) { |
| return; |
| } |
| } |
| } |
| |
| public void testSoftUpdateDocuments() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig() |
| .setMergePolicy(NoMergePolicy.INSTANCE) |
| .setSoftDeletesField("soft_delete")); |
| expectThrows(IllegalArgumentException.class, () -> { |
| writer.softUpdateDocument(null, new Document(), new NumericDocValuesField("soft_delete", 1)); |
| }); |
| |
| expectThrows(IllegalArgumentException.class, () -> { |
| writer.softUpdateDocument(new Term("id", "1"), new Document()); |
| }); |
| |
| expectThrows(IllegalArgumentException.class, () -> { |
| writer.softUpdateDocuments(null, Arrays.asList(new Document()), new NumericDocValuesField("soft_delete", 1)); |
| }); |
| |
| expectThrows(IllegalArgumentException.class, () -> { |
| writer.softUpdateDocuments(new Term("id", "1"), Arrays.asList(new Document())); |
| }); |
| |
| Document doc = new Document(); |
| doc.add(new StringField("id", "1", Field.Store.YES)); |
| doc.add(new StringField("version", "1", Field.Store.YES)); |
| writer.addDocument(doc); |
| doc = new Document(); |
| doc.add(new StringField("id", "1", Field.Store.YES)); |
| doc.add(new StringField("version", "2", Field.Store.YES)); |
| Field field = new NumericDocValuesField("soft_delete", 1); |
| writer.softUpdateDocument(new Term("id", "1"), doc, field); |
| DirectoryReader reader = DirectoryReader.open(writer); |
| assertEquals(2, reader.docFreq(new Term("id", "1"))); |
| IndexSearcher searcher = new IndexSearcher(reader); |
| TopDocs topDocs = searcher.search(new TermQuery(new Term("id", "1")), 10); |
| assertEquals(1, topDocs.totalHits.value); |
| Document document = reader.document(topDocs.scoreDocs[0].doc); |
| assertEquals("2", document.get("version")); |
| |
| // update the on-disk version |
| doc = new Document(); |
| doc.add(new StringField("id", "1", Field.Store.YES)); |
| doc.add(new StringField("version", "3", Field.Store.YES)); |
| field = new NumericDocValuesField("soft_delete", 1); |
| writer.softUpdateDocument(new Term("id", "1"), doc, field); |
| DirectoryReader oldReader = reader; |
| reader = DirectoryReader.openIfChanged(reader, writer); |
| assertNotSame(reader, oldReader); |
| oldReader.close(); |
| searcher = new IndexSearcher(reader); |
| topDocs = searcher.search(new TermQuery(new Term("id", "1")), 10); |
| assertEquals(1, topDocs.totalHits.value); |
| document = reader.document(topDocs.scoreDocs[0].doc); |
| assertEquals("3", document.get("version")); |
| |
| // now delete it |
| writer.updateDocValues(new Term("id", "1"), field); |
| oldReader = reader; |
| reader = DirectoryReader.openIfChanged(reader, writer); |
| assertNotSame(reader, oldReader); |
| assertNotNull(reader); |
| oldReader.close(); |
| searcher = new IndexSearcher(reader); |
| topDocs = searcher.search(new TermQuery(new Term("id", "1")), 10); |
| assertEquals(0, topDocs.totalHits.value); |
| int numSoftDeleted = 0; |
| for (SegmentCommitInfo info : writer.cloneSegmentInfos()) { |
| numSoftDeleted += info.getSoftDelCount(); |
| } |
| IndexWriter.DocStats docStats = writer.getDocStats(); |
| assertEquals(docStats.maxDoc - docStats.numDocs, numSoftDeleted); |
| for (LeafReaderContext context : reader.leaves()) { |
| LeafReader leaf = context.reader(); |
| assertNull(((SegmentReader) leaf).getHardLiveDocs()); |
| } |
| writer.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testSoftUpdatesConcurrently() throws IOException, InterruptedException { |
| softUpdatesConcurrently(false); |
| } |
| |
| public void testSoftUpdatesConcurrentlyMixedDeletes() throws IOException, InterruptedException { |
| softUpdatesConcurrently(true); |
| } |
| |
| public void softUpdatesConcurrently(boolean mixDeletes) throws IOException, InterruptedException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); |
| indexWriterConfig.setSoftDeletesField("soft_delete"); |
| AtomicBoolean mergeAwaySoftDeletes = new AtomicBoolean(random().nextBoolean()); |
| if (mixDeletes == false) { |
| indexWriterConfig.setMergePolicy(new OneMergeWrappingMergePolicy(indexWriterConfig.getMergePolicy(), towrap -> |
| new MergePolicy.OneMerge(towrap.segments) { |
| @Override |
| public CodecReader wrapForMerge(CodecReader reader) throws IOException { |
| if (mergeAwaySoftDeletes.get()) { |
| return towrap.wrapForMerge(reader); |
| } else { |
| CodecReader wrapped = towrap.wrapForMerge(reader); |
| return new FilterCodecReader(wrapped) { |
| @Override |
| public CacheHelper getCoreCacheHelper() { |
| return in.getCoreCacheHelper(); |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return in.getReaderCacheHelper(); |
| } |
| |
| @Override |
| public Bits getLiveDocs() { |
| return null; // everything is live |
| } |
| |
| @Override |
| public int numDocs() { |
| return maxDoc(); |
| } |
| }; |
| } |
| } |
| } |
| ) { |
| @Override |
| public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier<CodecReader> readerSupplier) throws IOException { |
| if (mergeAwaySoftDeletes.get()) { |
| return super.numDeletesToMerge(info, delCount, readerSupplier); |
| } else { |
| return 0; |
| } |
| } |
| }); |
| } |
| IndexWriter writer = new IndexWriter(dir, indexWriterConfig); |
| Thread[] threads = new Thread[2 + random().nextInt(3)]; |
| CountDownLatch startLatch = new CountDownLatch(1); |
| CountDownLatch started = new CountDownLatch(threads.length); |
| boolean updateSeveralDocs = random().nextBoolean(); |
| Set<String> ids = Collections.synchronizedSet(new HashSet<>()); |
| for (int i = 0; i < threads.length; i++) { |
| threads[i] = new Thread(() -> { |
| try { |
| started.countDown(); |
| startLatch.await(); |
| for (int d = 0; d < 100; d++) { |
| String id = String.valueOf(random().nextInt(10)); |
| if (updateSeveralDocs) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", id, Field.Store.YES)); |
| if (mixDeletes && random().nextBoolean()) { |
| writer.updateDocuments(new Term("id", id), Arrays.asList(doc, doc)); |
| } else { |
| writer.softUpdateDocuments(new Term("id", id), Arrays.asList(doc, doc), |
| new NumericDocValuesField("soft_delete", 1)); |
| } |
| } else { |
| Document doc = new Document(); |
| doc.add(new StringField("id", id, Field.Store.YES)); |
| if (mixDeletes && random().nextBoolean()) { |
| writer.updateDocument(new Term("id", id), doc); |
| } else { |
| writer.softUpdateDocument(new Term("id", id), doc, |
| new NumericDocValuesField("soft_delete", 1)); |
| } |
| } |
| ids.add(id); |
| } |
| } catch (IOException | InterruptedException e) { |
| throw new AssertionError(e); |
| } |
| }); |
| threads[i].start(); |
| } |
| started.await(); |
| startLatch.countDown(); |
| |
| for (int i = 0; i < threads.length; i++) { |
| threads[i].join(); |
| } |
| DirectoryReader reader = DirectoryReader.open(writer); |
| IndexSearcher searcher = new IndexSearcher(reader); |
| for (String id : ids) { |
| TopDocs topDocs = searcher.search(new TermQuery(new Term("id", id)), 10); |
| if (updateSeveralDocs) { |
| assertEquals(2, topDocs.totalHits.value); |
| assertEquals(Math.abs(topDocs.scoreDocs[0].doc - topDocs.scoreDocs[1].doc), 1); |
| } else { |
| assertEquals(1, topDocs.totalHits.value); |
| } |
| } |
| if (mixDeletes == false) { |
| for (LeafReaderContext context : reader.leaves()) { |
| LeafReader leaf = context.reader(); |
| assertNull(((SegmentReader) leaf).getHardLiveDocs()); |
| } |
| } |
| mergeAwaySoftDeletes.set(true); |
| writer.addDocument(new Document()); // add a dummy doc to trigger a segment here |
| writer.flush(); |
| writer.forceMerge(1); |
| DirectoryReader oldReader = reader; |
| reader = DirectoryReader.openIfChanged(reader, writer); |
| if (reader != null) { |
| oldReader.close(); |
| assertNotSame(oldReader, reader); |
| } else { |
| reader = oldReader; |
| } |
| for (String id : ids) { |
| if (updateSeveralDocs) { |
| assertEquals(2, reader.docFreq(new Term("id", id))); |
| } else { |
| assertEquals(1, reader.docFreq(new Term("id", id))); |
| } |
| } |
| int numSoftDeleted = 0; |
| for (SegmentCommitInfo info : writer.cloneSegmentInfos()) { |
| numSoftDeleted += info.getSoftDelCount() + info.getDelCount(); |
| } |
| IndexWriter.DocStats docStats = writer.getDocStats(); |
| assertEquals(docStats.maxDoc - docStats.numDocs, numSoftDeleted); |
| writer.commit(); |
| try (DirectoryReader dirReader = DirectoryReader.open(dir)) { |
| int delCount = 0; |
| for (LeafReaderContext ctx : dirReader.leaves()) { |
| SegmentCommitInfo segmentInfo = ((SegmentReader) ctx.reader()).getSegmentInfo(); |
| delCount += segmentInfo.getSoftDelCount() + segmentInfo.getDelCount(); |
| } |
| assertEquals(numSoftDeleted, delCount); |
| } |
| IOUtils.close(reader, writer, dir); |
| } |
| |
| public void testDeleteHappensBeforeWhileFlush() throws IOException, InterruptedException { |
| CountDownLatch latch = new CountDownLatch(1); |
| CountDownLatch inFlush = new CountDownLatch(1); |
| try (Directory dir = new FilterDirectory(newDirectory()) { |
| @Override |
| public IndexOutput createOutput(String name, IOContext context) throws IOException { |
| if (callStackContains(DefaultIndexingChain.class, "flush")) { |
| try { |
| inFlush.countDown(); |
| latch.await(); |
| } catch (InterruptedException e) { |
| throw new AssertionError(e); |
| } |
| } |
| return super.createOutput(name, context); |
| } |
| }; IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) { |
| Document document = new Document(); |
| document.add(new StringField("id", "1", Field.Store.YES)); |
| writer.addDocument(document); |
| Thread t = new Thread(() -> { |
| try { |
| inFlush.await(); |
| writer.docWriter.flushControl.setApplyAllDeletes(); |
| if (random().nextBoolean()) { |
| writer.updateDocument(new Term("id", "1"), document); |
| } else { |
| writer.deleteDocuments(new Term("id", "1")); |
| } |
| |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } finally { |
| latch.countDown(); |
| } |
| }); |
| t.start(); |
| try (IndexReader reader = writer.getReader()) { |
| assertEquals(1, reader.numDocs()); |
| }; |
| t.join(); |
| } |
| } |
| |
| private static void assertFiles(IndexWriter writer) throws IOException { |
| Predicate<String> filter = file -> file.startsWith("segments") == false && file.equals("write.lock") == false; |
| // remove segment files we don't know if we have committed and what is kept around |
| Set<String> segFiles = new HashSet<>(writer.cloneSegmentInfos().files(true)).stream() |
| .filter(filter).collect(Collectors.toSet()); |
| Set<String> dirFiles = Arrays.stream(writer.getDirectory().listAll()) |
| .filter(file -> !ExtrasFS.isExtra(file)) // ExtraFS might add an files, ignore them |
| .filter(filter).collect(Collectors.toSet()); |
| assertEquals(segFiles.size(), dirFiles.size()); |
| } |
| |
| public void testFullyDeletedSegmentsReleaseFiles() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig config = newIndexWriterConfig(); |
| config.setRAMBufferSizeMB(Integer.MAX_VALUE); |
| config.setMaxBufferedDocs(2); // no auto flush |
| IndexWriter writer = new IndexWriter(dir, config); |
| Document d = new Document(); |
| d.add(new StringField("id", "doc-0", Field.Store.YES)); |
| writer.addDocument(d); |
| writer.flush(); |
| d = new Document(); |
| d.add(new StringField("id", "doc-1", Field.Store.YES)); |
| writer.addDocument(d); |
| writer.deleteDocuments(new Term("id", "doc-1")); |
| assertEquals(1, writer.cloneSegmentInfos().size()); |
| writer.flush(); |
| assertEquals(1, writer.cloneSegmentInfos().size()); |
| writer.commit(); |
| assertFiles(writer); |
| assertEquals(1, writer.cloneSegmentInfos().size()); |
| IOUtils.close(writer, dir); |
| } |
| |
| public void testSegmentInfoIsSnapshot() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig config = newIndexWriterConfig(); |
| config.setRAMBufferSizeMB(Integer.MAX_VALUE); |
| config.setMaxBufferedDocs(2); // no auto flush |
| IndexWriter writer = new IndexWriter(dir, config); |
| Document d = new Document(); |
| d.add(new StringField("id", "doc-0", Field.Store.YES)); |
| writer.addDocument(d); |
| d = new Document(); |
| d.add(new StringField("id", "doc-1", Field.Store.YES)); |
| writer.addDocument(d); |
| DirectoryReader reader = writer.getReader(); |
| SegmentCommitInfo segmentInfo = ((SegmentReader) reader.leaves().get(0).reader()).getSegmentInfo(); |
| SegmentCommitInfo originalInfo = ((SegmentReader) reader.leaves().get(0).reader()).getOriginalSegmentInfo(); |
| assertEquals(0, originalInfo.getDelCount()); |
| assertEquals(0, segmentInfo.getDelCount()); |
| writer.deleteDocuments(new Term("id", "doc-0")); |
| writer.commit(); |
| assertEquals(0, segmentInfo.getDelCount()); |
| assertEquals(1, originalInfo.getDelCount()); |
| IOUtils.close(reader, writer, dir); |
| } |
| |
| public void testPreventChangingSoftDeletesField() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig().setSoftDeletesField("my_deletes")); |
| Document v1 = new Document(); |
| v1.add(new StringField("id", "1", Field.Store.YES)); |
| v1.add(new StringField("version", "1", Field.Store.YES)); |
| writer.addDocument(v1); |
| Document v2 = new Document(); |
| v2.add(new StringField("id", "1", Field.Store.YES)); |
| v2.add(new StringField("version", "2", Field.Store.YES)); |
| writer.softUpdateDocument(new Term("id", "1"), v2, new NumericDocValuesField("my_deletes", 1)); |
| writer.commit(); |
| writer.close(); |
| for (SegmentCommitInfo si : SegmentInfos.readLatestCommit(dir)) { |
| FieldInfos fieldInfos = IndexWriter.readFieldInfos(si); |
| assertEquals("my_deletes", fieldInfos.getSoftDeletesField()); |
| assertTrue(fieldInfos.fieldInfo("my_deletes").isSoftDeletesField()); |
| } |
| |
| IllegalArgumentException illegalError = expectThrows(IllegalArgumentException.class, () -> { |
| new IndexWriter(dir, newIndexWriterConfig().setSoftDeletesField("your_deletes")); |
| }); |
| assertEquals("cannot configure [your_deletes] as soft-deletes; " + |
| "this index uses [my_deletes] as soft-deletes already", illegalError.getMessage()); |
| |
| IndexWriterConfig softDeleteConfig = newIndexWriterConfig().setSoftDeletesField("my_deletes") |
| .setMergePolicy(new SoftDeletesRetentionMergePolicy("my_deletes", () -> new MatchAllDocsQuery(), newMergePolicy())); |
| writer = new IndexWriter(dir, softDeleteConfig); |
| Document tombstone = new Document(); |
| tombstone.add(new StringField("id", "tombstone", Field.Store.YES)); |
| tombstone.add(new NumericDocValuesField("my_deletes", 1)); |
| writer.addDocument(tombstone); |
| writer.flush(); |
| for (SegmentCommitInfo si : writer.cloneSegmentInfos()) { |
| FieldInfos fieldInfos = IndexWriter.readFieldInfos(si); |
| assertEquals("my_deletes", fieldInfos.getSoftDeletesField()); |
| assertTrue(fieldInfos.fieldInfo("my_deletes").isSoftDeletesField()); |
| } |
| writer.close(); |
| // reopen writer without soft-deletes field should be prevented |
| IllegalArgumentException reopenError = expectThrows(IllegalArgumentException.class, () -> { |
| new IndexWriter(dir, newIndexWriterConfig()); |
| }); |
| assertEquals("this index has [my_deletes] as soft-deletes already" + |
| " but soft-deletes field is not configured in IWC", reopenError.getMessage()); |
| dir.close(); |
| } |
| |
| public void testPreventAddingIndexesWithDifferentSoftDeletesField() throws Exception { |
| Directory dir1 = newDirectory(); |
| IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig().setSoftDeletesField("soft_deletes_1")); |
| for (int i = 0; i < 2; i++) { |
| Document d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| d.add(new StringField("version", Integer.toString(i), Field.Store.YES)); |
| w1.softUpdateDocument(new Term("id", "1"), d, new NumericDocValuesField("soft_deletes_1", 1)); |
| } |
| w1.commit(); |
| w1.close(); |
| |
| Directory dir2 = newDirectory(); |
| IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig().setSoftDeletesField("soft_deletes_2")); |
| IllegalArgumentException error = expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir1)); |
| assertEquals("cannot configure [soft_deletes_2] as soft-deletes; this index uses [soft_deletes_1] as soft-deletes already", |
| error.getMessage()); |
| w2.close(); |
| |
| Directory dir3 = newDirectory(); |
| IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField("soft_deletes_1"); |
| IndexWriter w3 = new IndexWriter(dir3, config); |
| w3.addIndexes(dir1); |
| for (SegmentCommitInfo si : w3.cloneSegmentInfos()) { |
| FieldInfo softDeleteField = IndexWriter.readFieldInfos(si).fieldInfo("soft_deletes_1"); |
| assertTrue(softDeleteField.isSoftDeletesField()); |
| } |
| w3.close(); |
| IOUtils.close(dir1, dir2, dir3); |
| } |
| |
| public void testNotAllowUsingExistingFieldAsSoftDeletes() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); |
| for (int i = 0; i < 2; i++) { |
| Document d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| if (random().nextBoolean()) { |
| d.add(new NumericDocValuesField("dv_field", 1)); |
| w.updateDocument(new Term("id", "1"), d); |
| } else { |
| w.softUpdateDocument(new Term("id", "1"), d, new NumericDocValuesField("dv_field", 1)); |
| } |
| } |
| w.commit(); |
| w.close(); |
| String softDeletesField = random().nextBoolean() ? "id" : "dv_field"; |
| IllegalArgumentException error = expectThrows(IllegalArgumentException.class, () -> { |
| IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField(softDeletesField); |
| new IndexWriter(dir, config); |
| }); |
| assertEquals("cannot configure [" + softDeletesField + "] as soft-deletes;" + |
| " this index uses [" + softDeletesField + "] as non-soft-deletes already", error.getMessage()); |
| IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField("non-existing-field"); |
| w = new IndexWriter(dir, config); |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testBrokenPayload() throws Exception { |
| Directory d = newDirectory(); |
| IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| Token token = new Token("bar", 0, 3); |
| BytesRef evil = newBytesRef(new byte[1024]); |
| evil.offset = 1000; // offset + length is now out of bounds. |
| token.setPayload(evil); |
| doc.add(new TextField("foo", new CannedTokenStream(token))); |
| expectThrows(IndexOutOfBoundsException.class, () -> w.addDocument(doc)); |
| w.close(); |
| d.close(); |
| } |
| |
| public void testSoftAndHardLiveDocs() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); |
| String softDeletesField = "soft_delete"; |
| indexWriterConfig.setSoftDeletesField(softDeletesField); |
| IndexWriter writer = new IndexWriter(dir, indexWriterConfig); |
| Set<Integer> uniqueDocs = new HashSet<>(); |
| for (int i = 0; i < 100; i++) { |
| int docId = random().nextInt(5); |
| uniqueDocs.add(docId); |
| Document doc = new Document(); |
| doc.add(new StringField("id", String.valueOf(docId), Field.Store.YES)); |
| if (docId % 2 == 0) { |
| writer.updateDocument(new Term("id", String.valueOf(docId)), doc); |
| } else { |
| writer.softUpdateDocument(new Term("id", String.valueOf(docId)), doc, |
| new NumericDocValuesField(softDeletesField, 0)); |
| } |
| if (random().nextBoolean()) { |
| assertHardLiveDocs(writer, uniqueDocs); |
| } |
| } |
| |
| if (random().nextBoolean()) { |
| writer.commit(); |
| } |
| assertHardLiveDocs(writer, uniqueDocs); |
| |
| |
| IOUtils.close(writer, dir); |
| } |
| |
| public void testAbortFullyDeletedSegment() throws Exception { |
| AtomicBoolean abortMergeBeforeCommit = new AtomicBoolean(); |
| OneMergeWrappingMergePolicy mergePolicy = |
| new OneMergeWrappingMergePolicy( |
| newMergePolicy(), |
| toWrap -> |
| new MergePolicy.OneMerge(toWrap.segments) { |
| @Override |
| void onMergeComplete() throws IOException { |
| super.onMergeComplete(); |
| if (abortMergeBeforeCommit.get()) { |
| setAborted(); |
| } |
| } |
| }) { |
| @Override |
| public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) { |
| return true; |
| } |
| }; |
| |
| Directory dir = newDirectory(); |
| IndexWriterConfig indexWriterConfig = |
| newIndexWriterConfig().setMergePolicy(mergePolicy).setCommitOnClose(false); |
| IndexWriter writer = new IndexWriter(dir, indexWriterConfig); |
| writer.addDocument(Collections.singletonList(new StringField("id", "1", Field.Store.YES))); |
| writer.flush(); |
| |
| writer.deleteDocuments(new Term("id", "1")); |
| abortMergeBeforeCommit.set(true); |
| writer.flush(); |
| writer.forceMerge(1); |
| IOUtils.close(writer, dir); |
| } |
| |
| private void assertHardLiveDocs(IndexWriter writer, Set<Integer> uniqueDocs) throws IOException { |
| try (DirectoryReader reader = DirectoryReader.open(writer)) { |
| assertEquals(uniqueDocs.size(), reader.numDocs()); |
| List<LeafReaderContext> leaves = reader.leaves(); |
| for (LeafReaderContext ctx : leaves) { |
| LeafReader leaf = ctx.reader(); |
| assertTrue(leaf instanceof SegmentReader); |
| SegmentReader sr = (SegmentReader) leaf; |
| if (sr.getHardLiveDocs() != null) { |
| Terms id = sr.terms("id"); |
| TermsEnum iterator = id.iterator(); |
| Bits hardLiveDocs = sr.getHardLiveDocs(); |
| Bits liveDocs = sr.getLiveDocs(); |
| for (Integer dId : uniqueDocs) { |
| boolean mustBeHardDeleted = dId % 2 == 0; |
| if (iterator.seekExact(newBytesRef(dId.toString()))) { |
| PostingsEnum postings = iterator.postings(null); |
| while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| if (liveDocs.get(postings.docID())) { |
| assertTrue(hardLiveDocs.get(postings.docID())); |
| } else if (mustBeHardDeleted) { |
| assertFalse(hardLiveDocs.get(postings.docID())); |
| } else { |
| assertTrue(hardLiveDocs.get(postings.docID())); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| public void testSetIndexCreatedVersion() throws IOException { |
| IllegalArgumentException e = expectThrows(IllegalArgumentException.class, |
| () -> new IndexWriterConfig().setIndexCreatedVersionMajor(Version.LATEST.major+1)); |
| assertEquals("indexCreatedVersionMajor may not be in the future: current major version is " + |
| Version.LATEST.major + ", but got: " + (Version.LATEST.major+1), e.getMessage()); |
| e = expectThrows(IllegalArgumentException.class, |
| () -> new IndexWriterConfig().setIndexCreatedVersionMajor(Version.LATEST.major-2)); |
| assertEquals("indexCreatedVersionMajor may not be less than the minimum supported version: " + |
| (Version.LATEST.major-1) + ", but got: " + (Version.LATEST.major-2), e.getMessage()); |
| |
| for (int previousMajor = Version.LATEST.major - 1; previousMajor <= Version.LATEST.major; previousMajor++) { |
| for (int newMajor = Version.LATEST.major - 1; newMajor <= Version.LATEST.major; newMajor++) { |
| for (OpenMode openMode : OpenMode.values()) { |
| try (Directory dir = newDirectory()) { |
| try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setIndexCreatedVersionMajor(previousMajor))) { |
| assert w != null; |
| } |
| SegmentInfos infos = SegmentInfos.readLatestCommit(dir); |
| assertEquals(previousMajor, infos.getIndexCreatedVersionMajor()); |
| try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setOpenMode(openMode).setIndexCreatedVersionMajor(newMajor))) { |
| assert w != null; |
| } |
| infos = SegmentInfos.readLatestCommit(dir); |
| if (openMode == OpenMode.CREATE) { |
| assertEquals(newMajor, infos.getIndexCreatedVersionMajor()); |
| } else { |
| assertEquals(previousMajor, infos.getIndexCreatedVersionMajor()); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // see LUCENE-8639 |
| public void testFlushWhileStartingNewThreads() throws IOException, InterruptedException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); |
| w.addDocument(new Document()); |
| assertEquals(1, w.docWriter.perThreadPool.size()); |
| CountDownLatch latch = new CountDownLatch(1); |
| Thread thread = new Thread(() -> { |
| latch.countDown(); |
| List<Closeable> states = new ArrayList<>(); |
| try { |
| for (int i = 0; i < 100; i++) { |
| DocumentsWriterPerThread state = w.docWriter.perThreadPool.getAndLock(); |
| states.add(state::unlock); |
| state.deleteQueue.getNextSequenceNumber(); |
| } |
| } finally { |
| IOUtils.closeWhileHandlingException(states); |
| } |
| }); |
| thread.start(); |
| latch.await(); |
| w.docWriter.flushControl.markForFullFlush(); |
| thread.join(); |
| w.docWriter.flushControl.abortFullFlushes(); |
| w.close(); |
| dir.close(); |
| } |
| |
| public void testRefreshAndRollbackConcurrently() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); |
| AtomicBoolean stopped = new AtomicBoolean(); |
| Semaphore indexedDocs = new Semaphore(0); |
| Thread indexer = new Thread(() -> { |
| while (stopped.get() == false) { |
| try { |
| String id = Integer.toString(random().nextInt(100)); |
| Document doc = new Document(); |
| doc.add(new StringField("id", id, Field.Store.YES)); |
| w.updateDocument(new Term("id", id), doc); |
| indexedDocs.release(1); |
| } catch (IOException e) { |
| throw new AssertionError(e); |
| } catch (AlreadyClosedException ignored) { |
| return; |
| } |
| } |
| }); |
| |
| SearcherManager sm = new SearcherManager(w, new SearcherFactory()); |
| Thread refresher = new Thread(() -> { |
| while (stopped.get() == false) { |
| try { |
| sm.maybeRefreshBlocking(); |
| } catch (IOException e) { |
| throw new AssertionError(e); |
| } catch (AlreadyClosedException ignored) { |
| return; |
| } |
| } |
| }); |
| |
| try { |
| indexer.start(); |
| refresher.start(); |
| indexedDocs.acquire(1 + random().nextInt(100)); |
| w.rollback(); |
| } finally { |
| stopped.set(true); |
| indexer.join(); |
| refresher.join(); |
| Throwable e = w.getTragicException(); |
| IOSupplier<String> supplier = () -> { |
| if (e != null) { |
| StringWriter writer = new StringWriter(); |
| try (PrintWriter printWriter = new PrintWriter(writer)) { |
| e.printStackTrace(printWriter); |
| } |
| return writer.toString(); |
| } else { |
| return ""; |
| } |
| }; |
| assertNull("should not consider ACE a tragedy on a closed IW: " + supplier.get(), w.getTragicException()); |
| IOUtils.close(sm, dir); |
| } |
| } |
| |
| public void testCloseableQueue() throws IOException, InterruptedException { |
| try(Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) { |
| IndexWriter.EventQueue queue = new IndexWriter.EventQueue(writer); |
| AtomicInteger executed = new AtomicInteger(0); |
| |
| queue.add(w -> { |
| assertNotNull(w); |
| executed.incrementAndGet(); |
| }); |
| queue.add(w -> { |
| assertNotNull(w); |
| executed.incrementAndGet(); |
| }); |
| queue.processEvents(); |
| assertEquals(2, executed.get()); |
| queue.processEvents(); |
| assertEquals(2, executed.get()); |
| |
| queue.add(w -> { |
| assertNotNull(w); |
| executed.incrementAndGet(); |
| }); |
| queue.add(w -> { |
| assertNotNull(w); |
| executed.incrementAndGet(); |
| }); |
| |
| |
| Thread t = new Thread(() -> { |
| try { |
| queue.processEvents(); |
| } catch (IOException e) { |
| throw new AssertionError(); |
| } catch (AlreadyClosedException ex) { |
| // possible |
| } |
| }); |
| t.start(); |
| queue.close(); |
| t.join(); |
| assertEquals(4, executed.get()); |
| expectThrows(AlreadyClosedException.class, () -> queue.processEvents()); |
| expectThrows(AlreadyClosedException.class, () -> queue.add(w -> {})); |
| } |
| } |
| |
| public void testRandomOperations() throws Exception { |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| iwc.setMergePolicy(new FilterMergePolicy(newMergePolicy()) { |
| boolean keepFullyDeletedSegment = random().nextBoolean(); |
| |
| @Override |
| public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) { |
| return keepFullyDeletedSegment; |
| } |
| }); |
| try (Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, iwc); |
| SearcherManager sm = new SearcherManager(writer, new SearcherFactory())) { |
| Semaphore numOperations = new Semaphore(10 + random().nextInt(1000)); |
| boolean singleDoc = random().nextBoolean(); |
| Thread[] threads = new Thread[1 + random().nextInt(4)]; |
| CountDownLatch latch = new CountDownLatch(threads.length); |
| for (int i = 0; i < threads.length; i++) { |
| threads[i] = new Thread(() -> { |
| latch.countDown(); |
| try { |
| latch.await(); |
| while (numOperations.tryAcquire()) { |
| String id = singleDoc ? "1" : Integer.toString(random().nextInt(10)); |
| Document doc = new Document(); |
| doc.add(new StringField("id", id, Field.Store.YES)); |
| if (random().nextInt(10) <= 2) { |
| writer.updateDocument(new Term("id", id), doc); |
| } else if (random().nextInt(10) <= 2) { |
| writer.deleteDocuments(new Term("id", id)); |
| } else { |
| writer.addDocument(doc); |
| } |
| if (random().nextInt(100) < 10) { |
| sm.maybeRefreshBlocking(); |
| } |
| if (random().nextInt(100) < 5) { |
| writer.commit(); |
| } |
| if (random().nextInt(100) < 1) { |
| writer.forceMerge(1 + random().nextInt(10), random().nextBoolean()); |
| } |
| } |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } |
| }); |
| threads[i].start(); |
| } |
| for (Thread thread : threads) { |
| thread.join(); |
| } |
| } |
| } |
| |
| public void testRandomOperationsWithSoftDeletes() throws Exception { |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| AtomicInteger seqNo = new AtomicInteger(-1); |
| AtomicInteger retainingSeqNo = new AtomicInteger(); |
| iwc.setSoftDeletesField("soft_deletes"); |
| iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_deletes", |
| () -> LongPoint.newRangeQuery("seq_no", retainingSeqNo.longValue(), Long.MAX_VALUE), newMergePolicy())); |
| try (Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, iwc); |
| SearcherManager sm = new SearcherManager(writer, new SearcherFactory())) { |
| Semaphore numOperations = new Semaphore(10 + random().nextInt(1000)); |
| boolean singleDoc = random().nextBoolean(); |
| Thread[] threads = new Thread[1 + random().nextInt(4)]; |
| CountDownLatch latch = new CountDownLatch(threads.length); |
| for (int i = 0; i < threads.length; i++) { |
| threads[i] = new Thread(() -> { |
| latch.countDown(); |
| try { |
| latch.await(); |
| while (numOperations.tryAcquire()) { |
| String id = singleDoc ? "1" : Integer.toString(random().nextInt(10)); |
| Document doc = new Document(); |
| doc.add(new StringField("id", id, Field.Store.YES)); |
| doc.add(new LongPoint("seq_no", seqNo.getAndIncrement())); |
| if (random().nextInt(10) <= 2) { |
| if (random().nextBoolean()) { |
| doc.add(new NumericDocValuesField(iwc.softDeletesField, 1)); |
| } |
| writer.softUpdateDocument(new Term("id", id), doc, new NumericDocValuesField(iwc.softDeletesField, 1)); |
| } else { |
| writer.addDocument(doc); |
| } |
| if (random().nextInt(100) < 10) { |
| int min = retainingSeqNo.get(); |
| int max = seqNo.get(); |
| if (min < max && random().nextBoolean()) { |
| retainingSeqNo.compareAndSet(min, min - random().nextInt(max - min)); |
| } |
| } |
| if (random().nextInt(100) < 10) { |
| sm.maybeRefreshBlocking(); |
| } |
| if (random().nextInt(100) < 5) { |
| writer.commit(); |
| } |
| if (random().nextInt(100) < 1) { |
| writer.forceMerge(1 + random().nextInt(10), random().nextBoolean()); |
| } |
| } |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } |
| }); |
| threads[i].start(); |
| } |
| for (Thread thread : threads) { |
| thread.join(); |
| } |
| } |
| } |
| |
| public void testMaxCompletedSequenceNumber() throws IOException, InterruptedException { |
| try (Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());) { |
| assertEquals(1, writer.addDocument(new Document())); |
| assertEquals(2, writer.updateDocument(new Term("foo", "bar"), new Document())); |
| writer.flushNextBuffer(); |
| assertEquals(3, writer.commit()); |
| assertEquals(4, writer.addDocument(new Document())); |
| assertEquals(4, writer.getMaxCompletedSequenceNumber()); |
| // commit moves seqNo by 2 since there is one DWPT that could still be in-flight |
| assertEquals(6, writer.commit()); |
| assertEquals(6, writer.getMaxCompletedSequenceNumber()); |
| assertEquals(7, writer.addDocument(new Document())); |
| writer.getReader().close(); |
| // getReader moves seqNo by 2 since there is one DWPT that could still be in-flight |
| assertEquals(9, writer.getMaxCompletedSequenceNumber()); |
| } |
| try (Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); |
| SearcherManager manager = new SearcherManager(writer, new SearcherFactory())) { |
| CountDownLatch start = new CountDownLatch(1); |
| int numDocs = 100 + random().nextInt(500); |
| AtomicLong maxCompletedSeqID = new AtomicLong(-1); |
| Thread[] threads = new Thread[2 + random().nextInt(2)]; |
| for (int i = 0; i < threads.length; i++) { |
| int idx = i; |
| threads[i] = new Thread(() -> { |
| try { |
| start.await(); |
| for (int j = 0; j < numDocs; j++) { |
| Document doc = new Document(); |
| String id = idx +"-"+j; |
| doc.add(new StringField("id", id, Field.Store.NO)); |
| long seqNo = writer.addDocument(doc); |
| if (maxCompletedSeqID.get() < seqNo) { |
| long maxCompletedSequenceNumber = writer.getMaxCompletedSequenceNumber(); |
| manager.maybeRefreshBlocking(); |
| maxCompletedSeqID.updateAndGet(oldVal-> Math.max(oldVal, maxCompletedSequenceNumber)); |
| } |
| IndexSearcher acquire = manager.acquire(); |
| try { |
| assertEquals(1, acquire.search(new TermQuery(new Term("id", id)), 10).totalHits.value); |
| } finally { |
| manager.release(acquire); |
| } |
| } |
| } catch (Exception e) { |
| throw new AssertionError(e); |
| } |
| }); |
| threads[i].start(); |
| } |
| start.countDown(); |
| for (int i = 0; i < threads.length; i++) { |
| threads[i].join(); |
| } |
| } |
| } |
| |
| public void testEnsureMaxSeqNoIsAccurateDuringFlush() throws IOException, InterruptedException { |
| AtomicReference<CountDownLatch> waitRef = new AtomicReference<>(new CountDownLatch(0)); |
| AtomicReference<CountDownLatch> arrivedRef = new AtomicReference<>(new CountDownLatch(0)); |
| InfoStream stream = new InfoStream() { |
| @Override |
| public void message(String component, String message) { |
| if ("TP".equals(component) && "DocumentsWriterPerThread addDocuments start".equals(message)) { |
| try { |
| arrivedRef.get().countDown(); |
| waitRef.get().await(); |
| } catch (InterruptedException e) { |
| throw new AssertionError(e); |
| } |
| } |
| } |
| |
| @Override |
| public boolean isEnabled(String component) { |
| return "TP".equals(component); |
| } |
| |
| @Override |
| public void close() throws IOException { |
| } |
| }; |
| IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); |
| indexWriterConfig.setInfoStream(stream); |
| try (Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, indexWriterConfig) { |
| @Override |
| protected boolean isEnableTestPoints() { |
| return true; |
| } |
| }) { |
| // we produce once DWPT with 1 doc |
| writer.addDocument(new Document()); |
| assertEquals(1, writer.docWriter.perThreadPool.size()); |
| long maxCompletedSequenceNumber = writer.getMaxCompletedSequenceNumber(); |
| // safe the seqNo and use the latches to block this DWPT such that a refresh must wait for it |
| waitRef.set(new CountDownLatch(1)); |
| arrivedRef.set(new CountDownLatch(1)); |
| Thread waiterThread = new Thread(() -> { |
| try { |
| writer.addDocument(new Document()); |
| } catch (IOException e) { |
| throw new AssertionError(e); |
| } |
| }); |
| waiterThread.start(); |
| arrivedRef.get().await(); |
| Thread refreshThread = new Thread(() -> { |
| try { |
| writer.getReader().close(); |
| } catch (IOException e) { |
| throw new AssertionError(e); |
| } |
| }); |
| DocumentsWriterDeleteQueue deleteQueue = writer.docWriter.deleteQueue; |
| refreshThread.start(); |
| // now we wait until the refresh has swapped the deleted queue and assert that |
| // we see an accurate seqId |
| while (writer.docWriter.deleteQueue == deleteQueue) { |
| Thread.yield(); // busy wait for refresh to swap the queue |
| } |
| try { |
| assertEquals(maxCompletedSequenceNumber, writer.getMaxCompletedSequenceNumber()); |
| } finally { |
| waitRef.get().countDown(); |
| waiterThread.join(); |
| refreshThread.join(); |
| } |
| assertEquals(maxCompletedSequenceNumber+2, writer.getMaxCompletedSequenceNumber()); |
| } |
| } |
| |
| public void testSegmentCommitInfoId() throws IOException { |
| try (Directory dir = newDirectory()) { |
| SegmentInfos segmentCommitInfos; |
| try (IndexWriter writer = new IndexWriter(dir, |
| new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { |
| Document doc = new Document(); |
| doc.add(new NumericDocValuesField("num", 1)); |
| doc.add(new StringField("id", "1", Field.Store.NO)); |
| writer.addDocument(doc); |
| doc = new Document(); |
| doc.add(new NumericDocValuesField("num", 1)); |
| doc.add(new StringField("id", "2", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.commit(); |
| segmentCommitInfos = SegmentInfos.readLatestCommit(dir); |
| byte[] id = segmentCommitInfos.info(0).getId(); |
| byte[] segInfoId = segmentCommitInfos.info(0).info.getId(); |
| |
| writer.updateNumericDocValue(new Term("id", "1"), "num", 2); |
| writer.commit(); |
| segmentCommitInfos = SegmentInfos.readLatestCommit(dir); |
| assertEquals(1, segmentCommitInfos.size()); |
| assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); |
| assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); |
| id = segmentCommitInfos.info(0).getId(); |
| writer.addDocument(new Document()); // second segment |
| writer.commit(); |
| segmentCommitInfos = SegmentInfos.readLatestCommit(dir); |
| assertEquals(2, segmentCommitInfos.size()); |
| assertEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); |
| assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); |
| |
| doc = new Document(); |
| doc.add(new NumericDocValuesField("num", 5)); |
| doc.add(new StringField("id", "1", Field.Store.NO)); |
| writer.updateDocument(new Term("id", "1"), doc); |
| writer.commit(); |
| segmentCommitInfos = SegmentInfos.readLatestCommit(dir); |
| assertEquals(3, segmentCommitInfos.size()); |
| assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); |
| assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); |
| } |
| |
| try (Directory dir2 = newDirectory(); |
| IndexWriter writer2 = new IndexWriter(dir2, |
| new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { |
| writer2.addIndexes(dir); |
| writer2.commit(); |
| SegmentInfos infos2 = SegmentInfos.readLatestCommit(dir2); |
| assertEquals(infos2.size(), segmentCommitInfos.size()); |
| for (int i = 0; i < infos2.size(); i++) { |
| assertEquals(StringHelper.idToString(infos2.info(i).getId()), StringHelper.idToString(segmentCommitInfos.info(i).getId())); |
| assertEquals(StringHelper.idToString(infos2.info(i).info.getId()), StringHelper.idToString(segmentCommitInfos.info(i).info.getId())); |
| } |
| } |
| } |
| |
| Set<String> ids = new HashSet<>(); |
| for (int i = 0; i < 2; i++) { |
| try (Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, |
| new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { |
| Document doc = new Document(); |
| doc.add(new NumericDocValuesField("num", 1)); |
| doc.add(new StringField("id", "1", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.commit(); |
| SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(dir); |
| String id = StringHelper.idToString(segmentCommitInfos.info(0).getId()); |
| assertTrue(ids.add(id)); |
| writer.updateNumericDocValue(new Term("id", "1"), "num", 2); |
| writer.commit(); |
| segmentCommitInfos = SegmentInfos.readLatestCommit(dir); |
| id = StringHelper.idToString(segmentCommitInfos.info(0).getId()); |
| assertTrue(ids.add(id)); |
| } |
| } |
| } |
| |
| public void testMergeZeroDocsMergeIsClosedOnce() throws IOException { |
| LogDocMergePolicy keepAllSegments = new LogDocMergePolicy() { |
| @Override |
| public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) { |
| return true; |
| } |
| }; |
| try (Directory dir = newDirectory()) { |
| try (IndexWriter writer = new IndexWriter(dir, |
| new IndexWriterConfig().setMergePolicy(new OneMergeWrappingMergePolicy(keepAllSegments, merge -> { |
| SetOnce<Boolean> onlyFinishOnce = new SetOnce<>(); |
| return new MergePolicy.OneMerge(merge.segments) { |
| @Override |
| public void mergeFinished(boolean success, boolean segmentDropped) throws IOException { |
| super.mergeFinished(success, segmentDropped); |
| onlyFinishOnce.set(true); |
| } |
| }; |
| })))) { |
| Document doc = new Document(); |
| doc.add(new StringField("id", "1", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.flush(); |
| writer.addDocument(doc); |
| writer.flush(); |
| writer.deleteDocuments(new Term("id", "1")); |
| writer.flush(); |
| assertEquals(2, writer.getSegmentCount()); |
| assertEquals(0, writer.getDocStats().numDocs); |
| assertEquals(2, writer.getDocStats().maxDoc); |
| writer.forceMerge(1); |
| } |
| } |
| } |
| |
| public void testMergeOnCommitKeepFullyDeletedSegments() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| iwc.setMaxFullFlushMergeWaitMillis(30 * 1000); |
| iwc.mergePolicy = new FilterMergePolicy(newMergePolicy()) { |
| @Override |
| public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) { |
| return true; |
| } |
| |
| @Override |
| public MergeSpecification findFullFlushMerges(MergeTrigger mergeTrigger, |
| SegmentInfos segmentInfos, |
| MergeContext mergeContext) { |
| List<SegmentCommitInfo> fullyDeletedSegments = segmentInfos.asList().stream() |
| .filter(s -> s.info.maxDoc() - s.getDelCount() == 0) |
| .collect(Collectors.toList()); |
| if (fullyDeletedSegments.isEmpty()) { |
| return null; |
| } |
| MergeSpecification spec = new MergeSpecification(); |
| spec.add(new OneMerge(fullyDeletedSegments)); |
| return spec; |
| } |
| }; |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document d = new Document(); |
| d.add(new StringField("id", "1", Field.Store.YES)); |
| w.addDocument(d); |
| w.commit(); |
| w.updateDocument(new Term("id", "1"), d); |
| w.commit(); |
| try (DirectoryReader reader = w.getReader()) { |
| assertEquals(1, reader.numDocs()); |
| } |
| IOUtils.close(w, dir); |
| } |
| |
| public void testPendingNumDocs() throws Exception { |
| try (Directory dir = newDirectory()) { |
| int numDocs = random().nextInt(100); |
| try (IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) { |
| for (int i = 0; i < numDocs; i++) { |
| Document d = new Document(); |
| d.add(new StringField("id", Integer.toString(i), Field.Store.YES)); |
| writer.addDocument(d); |
| assertEquals(i + 1L, writer.getPendingNumDocs()); |
| } |
| assertEquals(numDocs, writer.getPendingNumDocs()); |
| writer.flush(); |
| assertEquals(numDocs, writer.getPendingNumDocs()); |
| } |
| try (IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) { |
| assertEquals(numDocs, writer.getPendingNumDocs()); |
| } |
| } |
| } |
| |
| public void testIndexWriterBlocksOnStall() throws IOException, InterruptedException { |
| try (Directory dir = newDirectory()) { |
| try (IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) { |
| DocumentsWriterStallControl stallControl = writer.getDocsWriter().flushControl.stallControl; |
| stallControl.updateStalled(true); |
| Thread[] threads = new Thread[random().nextInt(3)+1]; |
| AtomicLong numThreadsCompleted = new AtomicLong(0); |
| for (int i = 0; i < threads.length; i++) { |
| threads[i] = new Thread(() -> { |
| Document d = new Document(); |
| d.add(new StringField("id", Integer.toString(0), Field.Store.YES)); |
| try { |
| writer.addDocument(d); |
| } catch (IOException e) { |
| throw new AssertionError(e); |
| } |
| numThreadsCompleted.incrementAndGet(); |
| }); |
| threads[i].start(); |
| } |
| try { |
| for (int i = 0; i < 10; i++) { |
| synchronized (stallControl) { |
| stallControl.notifyAll(); |
| } |
| while (stallControl.getNumWaiting() != threads.length) { |
| // wait for all threads to be stalled again |
| assertEquals(0, writer.getPendingNumDocs()); |
| assertEquals(0, numThreadsCompleted.get()); |
| } |
| } |
| } finally { |
| stallControl.updateStalled(false); |
| for (Thread t : threads) { |
| t.join(); |
| } |
| } |
| writer.commit(); |
| assertEquals(threads.length, writer.getDocStats().maxDoc); |
| } |
| } |
| } |
| } |