| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.nio.file.NoSuchFileException; |
| import java.nio.file.Path; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Random; |
| import java.util.Set; |
| import java.util.TreeSet; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.StoredField; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.store.BaseDirectoryWrapper; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.TestUtil; |
| import org.junit.Assume; |
| |
| import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| |
| @LuceneTestCase.SuppressCodecs("SimpleText") |
| public class TestDirectoryReader extends LuceneTestCase { |
| |
| public void testDocument() throws IOException { |
| SegmentReader [] readers = new SegmentReader[2]; |
| Directory dir = newDirectory(); |
| Document doc1 = new Document(); |
| Document doc2 = new Document(); |
| DocHelper.setupDoc(doc1); |
| DocHelper.setupDoc(doc2); |
| DocHelper.writeDoc(random(), dir, doc1); |
| DocHelper.writeDoc(random(), dir, doc2); |
| DirectoryReader reader = DirectoryReader.open(dir); |
| assertTrue(reader != null); |
| assertTrue(reader instanceof StandardDirectoryReader); |
| |
| Document newDoc1 = reader.document(0); |
| assertTrue(newDoc1 != null); |
| assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size()); |
| Document newDoc2 = reader.document(1); |
| assertTrue(newDoc2 != null); |
| assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size()); |
| Terms vector = reader.getTermVectors(0).terms(DocHelper.TEXT_FIELD_2_KEY); |
| assertNotNull(vector); |
| |
| reader.close(); |
| if (readers[0] != null) readers[0].close(); |
| if (readers[1] != null) readers[1].close(); |
| dir.close(); |
| } |
| |
| public void testMultiTermDocs() throws IOException { |
| Directory ramDir1=newDirectory(); |
| addDoc(random(), ramDir1, "test foo", true); |
| Directory ramDir2=newDirectory(); |
| addDoc(random(), ramDir2, "test blah", true); |
| Directory ramDir3=newDirectory(); |
| addDoc(random(), ramDir3, "test wow", true); |
| |
| IndexReader[] readers1 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir3)}; |
| IndexReader[] readers2 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir2), DirectoryReader.open(ramDir3)}; |
| MultiReader mr2 = new MultiReader(readers1); |
| MultiReader mr3 = new MultiReader(readers2); |
| |
| // test mixing up TermDocs and TermEnums from different readers. |
| TermsEnum te2 = MultiTerms.getTerms(mr2, "body").iterator(); |
| te2.seekCeil(new BytesRef("wow")); |
| PostingsEnum td = TestUtil.docs(random(), mr2, |
| "body", |
| te2.term(), |
| null, |
| 0); |
| |
| TermsEnum te3 = MultiTerms.getTerms(mr3, "body").iterator(); |
| te3.seekCeil(new BytesRef("wow")); |
| td = TestUtil.docs(random(), te3, |
| td, |
| 0); |
| |
| int ret = 0; |
| |
| // This should blow up if we forget to check that the TermEnum is from the same |
| // reader as the TermDocs. |
| while (td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ret += td.docID(); |
| |
| // really a dummy assert to ensure that we got some docs and to ensure that |
| // nothing is eliminated by hotspot |
| assertTrue(ret > 0); |
| readers1[0].close(); |
| readers1[1].close(); |
| readers2[0].close(); |
| readers2[1].close(); |
| readers2[2].close(); |
| ramDir1.close(); |
| ramDir2.close(); |
| ramDir3.close(); |
| } |
| |
| private void addDoc(Random random, Directory ramDir1, String s, boolean create) throws IOException { |
| IndexWriter iw = new IndexWriter(ramDir1, newIndexWriterConfig(new MockAnalyzer(random)) |
| .setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND)); |
| Document doc = new Document(); |
| doc.add(newTextField("body", s, Field.Store.NO)); |
| iw.addDocument(doc); |
| iw.close(); |
| } |
| |
| public void testIsCurrent() throws Exception { |
| Directory d = newDirectory(); |
| IndexWriter writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); |
| addDocumentWithFields(writer); |
| writer.close(); |
| // set up reader: |
| DirectoryReader reader = DirectoryReader.open(d); |
| assertTrue(reader.isCurrent()); |
| // modify index by adding another document: |
| writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND)); |
| addDocumentWithFields(writer); |
| writer.close(); |
| assertFalse(reader.isCurrent()); |
| // re-create index: |
| writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.CREATE)); |
| addDocumentWithFields(writer); |
| writer.close(); |
| assertFalse(reader.isCurrent()); |
| reader.close(); |
| d.close(); |
| } |
| |
| /** |
| * Tests the IndexReader.getFieldNames implementation |
| * @throws Exception on error |
| */ |
| public void testGetFieldNames() throws Exception { |
| Directory d = newDirectory(); |
| // set up writer |
| IndexWriter writer = new IndexWriter( |
| d, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| ); |
| |
| Document doc = new Document(); |
| |
| FieldType customType3 = new FieldType(); |
| customType3.setStored(true); |
| |
| doc.add(new StringField("keyword", "test1", Field.Store.YES)); |
| doc.add(new TextField("text", "test1", Field.Store.YES)); |
| doc.add(new Field("unindexed", "test1", customType3)); |
| doc.add(new TextField("unstored","test1", Field.Store.NO)); |
| writer.addDocument(doc); |
| |
| writer.close(); |
| // set up reader |
| DirectoryReader reader = DirectoryReader.open(d); |
| FieldInfos fieldInfos = FieldInfos.getMergedFieldInfos(reader); |
| assertNotNull(fieldInfos.fieldInfo("keyword")); |
| assertNotNull(fieldInfos.fieldInfo("text")); |
| assertNotNull(fieldInfos.fieldInfo("unindexed")); |
| assertNotNull(fieldInfos.fieldInfo("unstored")); |
| reader.close(); |
| // add more documents |
| writer = new IndexWriter( |
| d, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND) |
| .setMergePolicy(newLogMergePolicy()) |
| ); |
| // want to get some more segments here |
| int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); |
| for (int i = 0; i < 5*mergeFactor; i++) { |
| doc = new Document(); |
| doc.add(new StringField("keyword", "test1", Field.Store.YES)); |
| doc.add(new TextField("text", "test1", Field.Store.YES)); |
| doc.add(new Field("unindexed", "test1", customType3)); |
| doc.add(new TextField("unstored","test1", Field.Store.NO)); |
| writer.addDocument(doc); |
| } |
| // new fields are in some different segments (we hope) |
| for (int i = 0; i < 5*mergeFactor; i++) { |
| doc = new Document(); |
| doc.add(new StringField("keyword2", "test1", Field.Store.YES)); |
| doc.add(new TextField("text2", "test1", Field.Store.YES)); |
| doc.add(new Field("unindexed2", "test1", customType3)); |
| doc.add(new TextField("unstored2","test1", Field.Store.NO)); |
| writer.addDocument(doc); |
| } |
| // new termvector fields |
| |
| FieldType customType5 = new FieldType(TextField.TYPE_STORED); |
| customType5.setStoreTermVectors(true); |
| FieldType customType6 = new FieldType(TextField.TYPE_STORED); |
| customType6.setStoreTermVectors(true); |
| customType6.setStoreTermVectorOffsets(true); |
| FieldType customType7 = new FieldType(TextField.TYPE_STORED); |
| customType7.setStoreTermVectors(true); |
| customType7.setStoreTermVectorPositions(true); |
| FieldType customType8 = new FieldType(TextField.TYPE_STORED); |
| customType8.setStoreTermVectors(true); |
| customType8.setStoreTermVectorOffsets(true); |
| customType8.setStoreTermVectorPositions(true); |
| |
| for (int i = 0; i < 5*mergeFactor; i++) { |
| doc = new Document(); |
| doc.add(new TextField("tvnot", "tvnot", Field.Store.YES)); |
| doc.add(new Field("termvector", "termvector", customType5)); |
| doc.add(new Field("tvoffset", "tvoffset", customType6)); |
| doc.add(new Field("tvposition", "tvposition", customType7)); |
| doc.add(new Field("tvpositionoffset", "tvpositionoffset", customType8)); |
| writer.addDocument(doc); |
| } |
| |
| writer.close(); |
| |
| // verify fields again |
| reader = DirectoryReader.open(d); |
| fieldInfos = FieldInfos.getMergedFieldInfos(reader); |
| |
| Collection<String> allFieldNames = new HashSet<>(); |
| Collection<String> indexedFieldNames = new HashSet<>(); |
| Collection<String> notIndexedFieldNames = new HashSet<>(); |
| Collection<String> tvFieldNames = new HashSet<>(); |
| |
| for(FieldInfo fieldInfo : fieldInfos) { |
| final String name = fieldInfo.name; |
| allFieldNames.add(name); |
| if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { |
| indexedFieldNames.add(name); |
| } else { |
| notIndexedFieldNames.add(name); |
| } |
| if (fieldInfo.hasVectors()) { |
| tvFieldNames.add(name); |
| } |
| } |
| |
| assertTrue(allFieldNames.contains("keyword")); |
| assertTrue(allFieldNames.contains("text")); |
| assertTrue(allFieldNames.contains("unindexed")); |
| assertTrue(allFieldNames.contains("unstored")); |
| assertTrue(allFieldNames.contains("keyword2")); |
| assertTrue(allFieldNames.contains("text2")); |
| assertTrue(allFieldNames.contains("unindexed2")); |
| assertTrue(allFieldNames.contains("unstored2")); |
| assertTrue(allFieldNames.contains("tvnot")); |
| assertTrue(allFieldNames.contains("termvector")); |
| assertTrue(allFieldNames.contains("tvposition")); |
| assertTrue(allFieldNames.contains("tvoffset")); |
| assertTrue(allFieldNames.contains("tvpositionoffset")); |
| |
| // verify that only indexed fields were returned |
| assertEquals(11, indexedFieldNames.size()); // 6 original + the 5 termvector fields |
| assertTrue(indexedFieldNames.contains("keyword")); |
| assertTrue(indexedFieldNames.contains("text")); |
| assertTrue(indexedFieldNames.contains("unstored")); |
| assertTrue(indexedFieldNames.contains("keyword2")); |
| assertTrue(indexedFieldNames.contains("text2")); |
| assertTrue(indexedFieldNames.contains("unstored2")); |
| assertTrue(indexedFieldNames.contains("tvnot")); |
| assertTrue(indexedFieldNames.contains("termvector")); |
| assertTrue(indexedFieldNames.contains("tvposition")); |
| assertTrue(indexedFieldNames.contains("tvoffset")); |
| assertTrue(indexedFieldNames.contains("tvpositionoffset")); |
| |
| // verify that only unindexed fields were returned |
| assertEquals(2, notIndexedFieldNames.size()); // the following fields |
| assertTrue(notIndexedFieldNames.contains("unindexed")); |
| assertTrue(notIndexedFieldNames.contains("unindexed2")); |
| |
| // verify index term vector fields |
| assertEquals(tvFieldNames.toString(), 4, tvFieldNames.size()); // 4 field has term vector only |
| assertTrue(tvFieldNames.contains("termvector")); |
| |
| reader.close(); |
| d.close(); |
| } |
| |
| public void testTermVectors() throws Exception { |
| Directory d = newDirectory(); |
| // set up writer |
| IndexWriter writer = new IndexWriter( |
| d, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(newLogMergePolicy()) |
| ); |
| // want to get some more segments here |
| // new termvector fields |
| int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); |
| FieldType customType5 = new FieldType(TextField.TYPE_STORED); |
| customType5.setStoreTermVectors(true); |
| FieldType customType6 = new FieldType(TextField.TYPE_STORED); |
| customType6.setStoreTermVectors(true); |
| customType6.setStoreTermVectorOffsets(true); |
| FieldType customType7 = new FieldType(TextField.TYPE_STORED); |
| customType7.setStoreTermVectors(true); |
| customType7.setStoreTermVectorPositions(true); |
| FieldType customType8 = new FieldType(TextField.TYPE_STORED); |
| customType8.setStoreTermVectors(true); |
| customType8.setStoreTermVectorOffsets(true); |
| customType8.setStoreTermVectorPositions(true); |
| for (int i = 0; i < 5 * mergeFactor; i++) { |
| Document doc = new Document(); |
| doc.add(new TextField("tvnot", "one two two three three three", Field.Store.YES)); |
| doc.add(new Field("termvector", "one two two three three three", customType5)); |
| doc.add(new Field("tvoffset", "one two two three three three", customType6)); |
| doc.add(new Field("tvposition", "one two two three three three", customType7)); |
| doc.add(new Field("tvpositionoffset", "one two two three three three", customType8)); |
| |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| d.close(); |
| } |
| |
| void assertTermDocsCount(String msg, |
| IndexReader reader, |
| Term term, |
| int expected) |
| throws IOException { |
| PostingsEnum tdocs = TestUtil.docs(random(), reader, |
| term.field(), |
| new BytesRef(term.text()), |
| null, |
| 0); |
| int count = 0; |
| if (tdocs != null) { |
| while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) { |
| count++; |
| } |
| } |
| assertEquals(msg + ", count mismatch", expected, count); |
| } |
| |
| public void testBinaryFields() throws IOException { |
| Directory dir = newDirectory(); |
| byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; |
| |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(newLogMergePolicy())); |
| |
| for (int i = 0; i < 10; i++) { |
| addDoc(writer, "document number " + (i + 1)); |
| addDocumentWithFields(writer); |
| addDocumentWithDifferentFields(writer); |
| addDocumentWithTermVectorFields(writer); |
| } |
| writer.close(); |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND) |
| .setMergePolicy(newLogMergePolicy())); |
| Document doc = new Document(); |
| doc.add(new StoredField("bin1", bin)); |
| doc.add(new TextField("junk", "junk text", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.close(); |
| DirectoryReader reader = DirectoryReader.open(dir); |
| Document doc2 = reader.document(reader.maxDoc() - 1); |
| IndexableField[] fields = doc2.getFields("bin1"); |
| assertNotNull(fields); |
| assertEquals(1, fields.length); |
| IndexableField b1 = fields[0]; |
| assertTrue(b1.binaryValue() != null); |
| BytesRef bytesRef = b1.binaryValue(); |
| assertEquals(bin.length, bytesRef.length); |
| for (int i = 0; i < bin.length; i++) { |
| assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); |
| } |
| reader.close(); |
| // force merge |
| |
| |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND) |
| .setMergePolicy(newLogMergePolicy())); |
| writer.forceMerge(1); |
| writer.close(); |
| reader = DirectoryReader.open(dir); |
| doc2 = reader.document(reader.maxDoc() - 1); |
| fields = doc2.getFields("bin1"); |
| assertNotNull(fields); |
| assertEquals(1, fields.length); |
| b1 = fields[0]; |
| assertTrue(b1.binaryValue() != null); |
| bytesRef = b1.binaryValue(); |
| assertEquals(bin.length, bytesRef.length); |
| for (int i = 0; i < bin.length; i++) { |
| assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); |
| } |
| reader.close(); |
| dir.close(); |
| } |
| |
| /* ??? public void testOpenEmptyDirectory() throws IOException{ |
| String dirName = "test.empty"; |
| File fileDirName = new File(dirName); |
| if (!fileDirName.exists()) { |
| fileDirName.mkdir(); |
| } |
| try { |
| DirectoryReader.open(fileDirName); |
| fail("opening DirectoryReader on empty directory failed to produce FileNotFoundException/NoSuchFileException"); |
| } catch (FileNotFoundException | NoSuchFileException e) { |
| // GOOD |
| } |
| rmDir(fileDirName); |
| }*/ |
| |
| public void testFilesOpenClose() throws IOException { |
| // Create initial data set |
| Path dirFile = createTempDir("TestIndexReader.testFilesOpenClose"); |
| Directory dir = newFSDirectory(dirFile); |
| |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| addDoc(writer, "test"); |
| writer.close(); |
| dir.close(); |
| |
| // Try to erase the data - this ensures that the writer closed all files |
| IOUtils.rm(dirFile); |
| dir = newFSDirectory(dirFile); |
| |
| // Now create the data set again, just as before |
| writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.CREATE)); |
| addDoc(writer, "test"); |
| writer.close(); |
| dir.close(); |
| |
| // Now open existing directory and test that reader closes all files |
| dir = newFSDirectory(dirFile); |
| DirectoryReader reader1 = DirectoryReader.open(dir); |
| reader1.close(); |
| dir.close(); |
| |
| // The following will fail if reader did not close |
| // all files |
| IOUtils.rm(dirFile); |
| } |
| |
| public void testOpenReaderAfterDelete() throws IOException { |
| Path dirFile = createTempDir("deletetest"); |
| Directory dir = newFSDirectory(dirFile); |
| if (dir instanceof BaseDirectoryWrapper) { |
| ((BaseDirectoryWrapper)dir).setCheckIndexOnClose(false); // we will hit NoSuchFileException in MDW since we nuked it! |
| } |
| expectThrowsAnyOf(Arrays.asList(FileNotFoundException.class, NoSuchFileException.class), |
| () -> DirectoryReader.open(dir) |
| ); |
| |
| IOUtils.rm(dirFile); |
| |
| // Make sure we still get a CorruptIndexException (not NPE): |
| expectThrowsAnyOf(Arrays.asList(FileNotFoundException.class, NoSuchFileException.class), |
| () -> DirectoryReader.open(dir) |
| ); |
| |
| dir.close(); |
| } |
| |
| static void addDocumentWithFields(IndexWriter writer) throws IOException |
| { |
| Document doc = new Document(); |
| |
| FieldType customType3 = new FieldType(); |
| customType3.setStored(true); |
| doc.add(newStringField("keyword", "test1", Field.Store.YES)); |
| doc.add(newTextField("text", "test1", Field.Store.YES)); |
| doc.add(newField("unindexed", "test1", customType3)); |
| doc.add(new TextField("unstored","test1", Field.Store.NO)); |
| writer.addDocument(doc); |
| } |
| |
| static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException |
| { |
| Document doc = new Document(); |
| |
| FieldType customType3 = new FieldType(); |
| customType3.setStored(true); |
| doc.add(newStringField("keyword2", "test1", Field.Store.YES)); |
| doc.add(newTextField("text2", "test1", Field.Store.YES)); |
| doc.add(newField("unindexed2", "test1", customType3)); |
| doc.add(new TextField("unstored2","test1", Field.Store.NO)); |
| writer.addDocument(doc); |
| } |
| |
| static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException |
| { |
| Document doc = new Document(); |
| FieldType customType5 = new FieldType(TextField.TYPE_STORED); |
| customType5.setStoreTermVectors(true); |
| FieldType customType6 = new FieldType(TextField.TYPE_STORED); |
| customType6.setStoreTermVectors(true); |
| customType6.setStoreTermVectorOffsets(true); |
| FieldType customType7 = new FieldType(TextField.TYPE_STORED); |
| customType7.setStoreTermVectors(true); |
| customType7.setStoreTermVectorPositions(true); |
| FieldType customType8 = new FieldType(TextField.TYPE_STORED); |
| customType8.setStoreTermVectors(true); |
| customType8.setStoreTermVectorOffsets(true); |
| customType8.setStoreTermVectorPositions(true); |
| doc.add(newTextField("tvnot", "tvnot", Field.Store.YES)); |
| doc.add(newField("termvector","termvector",customType5)); |
| doc.add(newField("tvoffset","tvoffset", customType6)); |
| doc.add(newField("tvposition","tvposition", customType7)); |
| doc.add(newField("tvpositionoffset","tvpositionoffset", customType8)); |
| |
| writer.addDocument(doc); |
| } |
| |
| static void addDoc(IndexWriter writer, String value) throws IOException { |
| Document doc = new Document(); |
| doc.add(newTextField("content", value, Field.Store.NO)); |
| writer.addDocument(doc); |
| } |
| |
| // TODO: maybe this can reuse the logic of test dueling codecs? |
| public static void assertIndexEquals(DirectoryReader index1, DirectoryReader index2) throws IOException { |
| assertEquals("IndexReaders have different values for numDocs.", index1.numDocs(), index2.numDocs()); |
| assertEquals("IndexReaders have different values for maxDoc.", index1.maxDoc(), index2.maxDoc()); |
| assertEquals("Only one IndexReader has deletions.", index1.hasDeletions(), index2.hasDeletions()); |
| assertEquals("Single segment test differs.", index1.leaves().size() == 1, index2.leaves().size() == 1); |
| |
| // check field names |
| FieldInfos fieldInfos1 = FieldInfos.getMergedFieldInfos(index1); |
| FieldInfos fieldInfos2 = FieldInfos.getMergedFieldInfos(index2); |
| assertEquals("IndexReaders have different numbers of fields.", fieldInfos1.size(), fieldInfos2.size()); |
| final int numFields = fieldInfos1.size(); |
| for(int fieldID=0;fieldID<numFields;fieldID++) { |
| final FieldInfo fieldInfo1 = fieldInfos1.fieldInfo(fieldID); |
| final FieldInfo fieldInfo2 = fieldInfos2.fieldInfo(fieldID); |
| assertEquals("Different field names.", fieldInfo1.name, fieldInfo2.name); |
| } |
| |
| // check norms |
| for(FieldInfo fieldInfo : fieldInfos1) { |
| String curField = fieldInfo.name; |
| NumericDocValues norms1 = MultiDocValues.getNormValues(index1, curField); |
| NumericDocValues norms2 = MultiDocValues.getNormValues(index2, curField); |
| if (norms1 != null && norms2 != null) { |
| // todo: generalize this (like TestDuelingCodecs assert) |
| while (true) { |
| int docID = norms1.nextDoc(); |
| assertEquals(docID, norms2.nextDoc()); |
| if (docID == NO_MORE_DOCS) { |
| break; |
| } |
| assertEquals("Norm different for doc " + docID + " and field '" + curField + "'.", norms1.longValue(), norms2.longValue()); |
| } |
| } else { |
| assertNull(norms1); |
| assertNull(norms2); |
| } |
| } |
| |
| // check deletions |
| final Bits liveDocs1 = MultiBits.getLiveDocs(index1); |
| final Bits liveDocs2 = MultiBits.getLiveDocs(index2); |
| for (int i = 0; i < index1.maxDoc(); i++) { |
| assertEquals("Doc " + i + " only deleted in one index.", |
| liveDocs1 == null || !liveDocs1.get(i), |
| liveDocs2 == null || !liveDocs2.get(i)); |
| } |
| |
| // check stored fields |
| for (int i = 0; i < index1.maxDoc(); i++) { |
| if (liveDocs1 == null || liveDocs1.get(i)) { |
| Document doc1 = index1.document(i); |
| Document doc2 = index2.document(i); |
| List<IndexableField> field1 = doc1.getFields(); |
| List<IndexableField> field2 = doc2.getFields(); |
| assertEquals("Different numbers of fields for doc " + i + ".", field1.size(), field2.size()); |
| Iterator<IndexableField> itField1 = field1.iterator(); |
| Iterator<IndexableField> itField2 = field2.iterator(); |
| while (itField1.hasNext()) { |
| Field curField1 = (Field) itField1.next(); |
| Field curField2 = (Field) itField2.next(); |
| assertEquals("Different fields names for doc " + i + ".", curField1.name(), curField2.name()); |
| assertEquals("Different field values for doc " + i + ".", curField1.stringValue(), curField2.stringValue()); |
| } |
| } |
| } |
| |
| // check dictionary and posting lists |
| TreeSet<String> fields1 = new TreeSet<>(FieldInfos.getIndexedFields(index1)); |
| TreeSet<String> fields2 = new TreeSet<>(FieldInfos.getIndexedFields(index2)); |
| Iterator<String> fenum2 = fields2.iterator(); |
| for (String field1 : fields1) { |
| assertEquals("Different fields", field1, fenum2.next()); |
| Terms terms1 = MultiTerms.getTerms(index1, field1); |
| if (terms1 == null) { |
| assertNull(MultiTerms.getTerms(index2, field1)); |
| continue; |
| } |
| TermsEnum enum1 = terms1.iterator(); |
| |
| Terms terms2 = MultiTerms.getTerms(index2, field1); |
| assertNotNull(terms2); |
| TermsEnum enum2 = terms2.iterator(); |
| |
| while(enum1.next() != null) { |
| assertEquals("Different terms", enum1.term(), enum2.next()); |
| PostingsEnum tp1 = enum1.postings(null, PostingsEnum.ALL); |
| PostingsEnum tp2 = enum2.postings(null, PostingsEnum.ALL); |
| |
| while(tp1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| assertTrue(tp2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals("Different doc id in postinglist of term " + enum1.term() + ".", tp1.docID(), tp2.docID()); |
| assertEquals("Different term frequence in postinglist of term " + enum1.term() + ".", tp1.freq(), tp2.freq()); |
| for (int i = 0; i < tp1.freq(); i++) { |
| assertEquals("Different positions in postinglist of term " + enum1.term() + ".", tp1.nextPosition(), tp2.nextPosition()); |
| } |
| } |
| } |
| } |
| assertFalse(fenum2.hasNext()); |
| } |
| |
| public void testGetIndexCommit() throws IOException { |
| |
| Directory d = newDirectory(); |
| |
| // set up writer |
| IndexWriter writer = new IndexWriter( |
| d, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2) |
| .setMergePolicy(newLogMergePolicy(10)) |
| ); |
| for(int i=0;i<27;i++) |
| addDocumentWithFields(writer); |
| writer.close(); |
| |
| SegmentInfos sis = SegmentInfos.readLatestCommit(d); |
| DirectoryReader r = DirectoryReader.open(d); |
| IndexCommit c = r.getIndexCommit(); |
| |
| assertEquals(sis.getSegmentsFileName(), c.getSegmentsFileName()); |
| |
| assertTrue(c.equals(r.getIndexCommit())); |
| |
| // Change the index |
| writer = new IndexWriter( |
| d, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND) |
| .setMaxBufferedDocs(2) |
| .setMergePolicy(newLogMergePolicy(10)) |
| ); |
| for(int i=0;i<7;i++) |
| addDocumentWithFields(writer); |
| writer.close(); |
| |
| DirectoryReader r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| assertFalse(c.equals(r2.getIndexCommit())); |
| assertFalse(r2.getIndexCommit().getSegmentCount() == 1); |
| r2.close(); |
| |
| writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setOpenMode(OpenMode.APPEND)); |
| writer.forceMerge(1); |
| writer.close(); |
| |
| r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| assertNull(DirectoryReader.openIfChanged(r2)); |
| assertEquals(1, r2.getIndexCommit().getSegmentCount()); |
| |
| r.close(); |
| r2.close(); |
| d.close(); |
| } |
| |
| static Document createDocument(String id) { |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.setTokenized(false); |
| customType.setOmitNorms(true); |
| |
| doc.add(newField("id", id, customType)); |
| return doc; |
| } |
| |
| // LUCENE-1468 -- make sure on attempting to open an |
| // DirectoryReader on a non-existent directory, you get a |
| // good exception |
| public void testNoDir() throws Throwable { |
| Path tempDir = createTempDir("doesnotexist"); |
| Directory dir = newFSDirectory(tempDir); |
| expectThrows(IndexNotFoundException.class, () -> { |
| DirectoryReader.open(dir); |
| }); |
| dir.close(); |
| } |
| |
| // LUCENE-1509 |
| public void testNoDupCommitFileNames() throws Throwable { |
| |
| Directory dir = newDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMaxBufferedDocs(2)); |
| writer.addDocument(createDocument("a")); |
| writer.addDocument(createDocument("a")); |
| writer.addDocument(createDocument("a")); |
| writer.close(); |
| |
| Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); |
| for (final IndexCommit commit : commits) { |
| Collection<String> files = commit.getFileNames(); |
| HashSet<String> seen = new HashSet<>(); |
| for (final String fileName : files) { |
| assertTrue("file " + fileName + " was duplicated", !seen.contains(fileName)); |
| seen.add(fileName); |
| } |
| } |
| |
| dir.close(); |
| } |
| |
| // LUCENE-1586: getUniqueTermCount |
| public void testUniqueTermCount() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document doc = new Document(); |
| doc.add(newTextField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO)); |
| doc.add(newTextField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO)); |
| writer.addDocument(doc); |
| writer.addDocument(doc); |
| writer.commit(); |
| |
| DirectoryReader r = DirectoryReader.open(dir); |
| LeafReader r1 = getOnlyLeafReader(r); |
| assertEquals(26, r1.terms("field").size()); |
| assertEquals(10, r1.terms("number").size()); |
| writer.addDocument(doc); |
| writer.commit(); |
| DirectoryReader r2 = DirectoryReader.openIfChanged(r); |
| assertNotNull(r2); |
| r.close(); |
| |
| for(LeafReaderContext s : r2.leaves()) { |
| assertEquals(26, s.reader().terms("field").size()); |
| assertEquals(10, s.reader().terms("number").size()); |
| } |
| r2.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-2046 |
| public void testPrepareCommitIsCurrent() throws Throwable { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.commit(); |
| Document doc = new Document(); |
| writer.addDocument(doc); |
| DirectoryReader r = DirectoryReader.open(dir); |
| assertTrue(r.isCurrent()); |
| writer.addDocument(doc); |
| writer.prepareCommit(); |
| assertTrue(r.isCurrent()); |
| DirectoryReader r2 = DirectoryReader.openIfChanged(r); |
| assertNull(r2); |
| writer.commit(); |
| assertFalse(r.isCurrent()); |
| writer.close(); |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-2753 |
| public void testListCommits() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(null) |
| .setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()))); |
| SnapshotDeletionPolicy sdp = (SnapshotDeletionPolicy) writer.getConfig().getIndexDeletionPolicy(); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| sdp.snapshot(); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| sdp.snapshot(); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| sdp.snapshot(); |
| writer.close(); |
| long currentGen = 0; |
| for (IndexCommit ic : DirectoryReader.listCommits(dir)) { |
| assertTrue("currentGen=" + currentGen + " commitGen=" + ic.getGeneration(), currentGen < ic.getGeneration()); |
| currentGen = ic.getGeneration(); |
| } |
| dir.close(); |
| } |
| |
| // Make sure totalTermFreq works correctly in the terms |
| // dict cache |
| public void testTotalTermFreqCached() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document d = new Document(); |
| d.add(newTextField("f", "a a b", Field.Store.NO)); |
| writer.addDocument(d); |
| DirectoryReader r = writer.getReader(); |
| writer.close(); |
| try { |
| // Make sure codec impls totalTermFreq (eg PreFlex doesn't) |
| Assume.assumeTrue(r.totalTermFreq(new Term("f", new BytesRef("b"))) != -1); |
| assertEquals(1, r.totalTermFreq(new Term("f", new BytesRef("b")))); |
| assertEquals(2, r.totalTermFreq(new Term("f", new BytesRef("a")))); |
| assertEquals(1, r.totalTermFreq(new Term("f", new BytesRef("b")))); |
| } finally { |
| r.close(); |
| dir.close(); |
| } |
| } |
| |
| public void testGetSumDocFreq() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document d = new Document(); |
| d.add(newTextField("f", "a", Field.Store.NO)); |
| writer.addDocument(d); |
| d = new Document(); |
| d.add(newTextField("f", "b", Field.Store.NO)); |
| writer.addDocument(d); |
| DirectoryReader r = writer.getReader(); |
| writer.close(); |
| try { |
| // Make sure codec impls getSumDocFreq (eg PreFlex doesn't) |
| Assume.assumeTrue(r.getSumDocFreq("f") != -1); |
| assertEquals(2, r.getSumDocFreq("f")); |
| } finally { |
| r.close(); |
| dir.close(); |
| } |
| } |
| |
| public void testGetDocCount() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document d = new Document(); |
| d.add(newTextField("f", "a", Field.Store.NO)); |
| writer.addDocument(d); |
| d = new Document(); |
| d.add(newTextField("f", "a", Field.Store.NO)); |
| writer.addDocument(d); |
| DirectoryReader r = writer.getReader(); |
| writer.close(); |
| try { |
| // Make sure codec impls getSumDocFreq (eg PreFlex doesn't) |
| Assume.assumeTrue(r.getDocCount("f") != -1); |
| assertEquals(2, r.getDocCount("f")); |
| } finally { |
| r.close(); |
| dir.close(); |
| } |
| } |
| |
| public void testGetSumTotalTermFreq() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| Document d = new Document(); |
| d.add(newTextField("f", "a b b", Field.Store.NO)); |
| writer.addDocument(d); |
| d = new Document(); |
| d.add(newTextField("f", "a a b", Field.Store.NO)); |
| writer.addDocument(d); |
| DirectoryReader r = writer.getReader(); |
| writer.close(); |
| try { |
| // Make sure codec impls getSumDocFreq (eg PreFlex doesn't) |
| Assume.assumeTrue(r.getSumTotalTermFreq("f") != -1); |
| assertEquals(6, r.getSumTotalTermFreq("f")); |
| } finally { |
| r.close(); |
| dir.close(); |
| } |
| } |
| |
| // LUCENE-2474 |
| public void testReaderFinishedListener() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(newLogMergePolicy())); |
| ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(3); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| final DirectoryReader reader = writer.getReader(); |
| final int[] closeCount = new int[1]; |
| final IndexReader.ClosedListener listener = new IndexReader.ClosedListener() { |
| @Override |
| public void onClose(IndexReader.CacheKey key) { |
| closeCount[0]++; |
| } |
| }; |
| |
| reader.getReaderCacheHelper().addClosedListener(listener); |
| |
| reader.close(); |
| |
| // Close the top reader, it's the only one that should be closed |
| assertEquals(1, closeCount[0]); |
| writer.close(); |
| |
| DirectoryReader reader2 = DirectoryReader.open(dir); |
| reader2.getReaderCacheHelper().addClosedListener(listener); |
| |
| closeCount[0] = 0; |
| reader2.close(); |
| assertEquals(1, closeCount[0]); |
| dir.close(); |
| } |
| |
| public void testOOBDocID() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.addDocument(new Document()); |
| DirectoryReader r = writer.getReader(); |
| writer.close(); |
| r.document(0); |
| expectThrows(IllegalArgumentException.class, () -> { |
| r.document(1); |
| }); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testTryIncRef() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| DirectoryReader r = DirectoryReader.open(dir); |
| assertTrue(r.tryIncRef()); |
| r.decRef(); |
| r.close(); |
| assertFalse(r.tryIncRef()); |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testStressTryIncRef() throws IOException, InterruptedException { |
| Directory dir = newDirectory(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); |
| writer.addDocument(new Document()); |
| writer.commit(); |
| DirectoryReader r = DirectoryReader.open(dir); |
| int numThreads = atLeast(2); |
| |
| IncThread[] threads = new IncThread[numThreads]; |
| for (int i = 0; i < threads.length; i++) { |
| threads[i] = new IncThread(r, random()); |
| threads[i].start(); |
| } |
| Thread.sleep(100); |
| |
| assertTrue(r.tryIncRef()); |
| r.decRef(); |
| r.close(); |
| |
| for (int i = 0; i < threads.length; i++) { |
| threads[i].join(); |
| assertNull(threads[i].failed); |
| } |
| assertFalse(r.tryIncRef()); |
| writer.close(); |
| dir.close(); |
| } |
| |
| static class IncThread extends Thread { |
| final IndexReader toInc; |
| final Random random; |
| Throwable failed; |
| |
| IncThread(IndexReader toInc, Random random) { |
| this.toInc = toInc; |
| this.random = random; |
| } |
| |
| @Override |
| public void run() { |
| try { |
| while (toInc.tryIncRef()) { |
| assertFalse(toInc.hasDeletions()); |
| toInc.decRef(); |
| } |
| assertFalse(toInc.tryIncRef()); |
| } catch (Throwable e) { |
| failed = e; |
| } |
| } |
| } |
| |
| public void testLoadCertainFields() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| Document doc = new Document(); |
| doc.add(newStringField("field1", "foobar", Field.Store.YES)); |
| doc.add(newStringField("field2", "foobaz", Field.Store.YES)); |
| writer.addDocument(doc); |
| DirectoryReader r = writer.getReader(); |
| writer.close(); |
| Set<String> fieldsToLoad = new HashSet<>(); |
| assertEquals(0, r.document(0, fieldsToLoad).getFields().size()); |
| fieldsToLoad.add("field1"); |
| Document doc2 = r.document(0, fieldsToLoad); |
| assertEquals(1, doc2.getFields().size()); |
| assertEquals("foobar", doc2.get("field1")); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testIndexExistsOnNonExistentDirectory() throws Exception { |
| Path tempDir = createTempDir("testIndexExistsOnNonExistentDirectory"); |
| Directory dir = newFSDirectory(tempDir); |
| assertFalse(DirectoryReader.indexExists(dir)); |
| dir.close(); |
| } |
| } |