blob: ee9b710a71d4db462239ba0e2ba95849de91f721 [file] [log] [blame]
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 1bc2f40..9ff3cdb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -17,7 +17,6 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
@@ -397,6 +396,9 @@ public final class CodecUtil {
* @throws IOException if the footer is invalid
*/
public static long retrieveChecksum(IndexInput in) throws IOException {
+ if (in.length() < footerLength()) {
+ throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
+ }
in.seek(in.length() - footerLength());
validateFooter(in);
return readCRC(in);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
index 57df8aa..b1cfc5d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
@@ -68,6 +68,13 @@ final class Lucene50CompoundReader extends Directory {
String entriesFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.ENTRIES_EXTENSION);
this.entries = readEntries(si.getId(), directory, entriesFileName);
boolean success = false;
+
+ long expectedLength = CodecUtil.indexHeaderLength(Lucene50CompoundFormat.DATA_CODEC, "");
+ for(Map.Entry<String,FileEntry> ent : entries.entrySet()) {
+ expectedLength += ent.getValue().length;
+ }
+ expectedLength += CodecUtil.footerLength();
+
handle = directory.openInput(dataFileName, context);
try {
CodecUtil.checkIndexHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
@@ -77,6 +84,13 @@ final class Lucene50CompoundReader extends Directory {
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
CodecUtil.retrieveChecksum(handle);
+
+ // We also validate length, because e.g. if you strip 16 bytes off the .cfs we otherwise
+ // would not detect it:
+ if (handle.length() != expectedLength) {
+ throw new CorruptIndexException("length should be " + expectedLength + " bytes, but is " + handle.length() + " instead", handle);
+ }
+
success = true;
} finally {
if (!success) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointFormat.java
index 61ce8fb..beec4bd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointFormat.java
@@ -73,7 +73,8 @@ import org.apache.lucene.index.SegmentWriteState;
public final class Lucene60PointFormat extends PointFormat {
- static final String CODEC_NAME = "Lucene60PointFormat";
+ static final String DATA_CODEC_NAME = "Lucene60PointFormatData";
+ static final String META_CODEC_NAME = "Lucene60PointFormatMeta";
/**
* Filename extension for the leaf blocks
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointReader.java
index 2e2bddb..2d18019 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointReader.java
@@ -1,6 +1,5 @@
package org.apache.lucene.codecs.lucene60;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -48,38 +47,69 @@ public class Lucene60PointReader extends PointReader implements Closeable {
/** Sole constructor */
public Lucene60PointReader(SegmentReadState readState) throws IOException {
this.readState = readState;
- String dataFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
- readState.segmentSuffix,
- Lucene60PointFormat.DATA_EXTENSION);
- dataIn = readState.directory.openInput(dataFileName, readState.context);
+
+
String indexFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
readState.segmentSuffix,
Lucene60PointFormat.INDEX_EXTENSION);
- boolean success = false;
+ Map<Integer,Long> fieldToFileOffset = new HashMap<>();
// Read index file
try (ChecksumIndexInput indexIn = readState.directory.openChecksumInput(indexFileName, readState.context)) {
- CodecUtil.checkIndexHeader(indexIn,
- Lucene60PointFormat.CODEC_NAME,
- Lucene60PointFormat.INDEX_VERSION_START,
- Lucene60PointFormat.INDEX_VERSION_START,
+ Throwable priorE = null;
+ try {
+ CodecUtil.checkIndexHeader(indexIn,
+ Lucene60PointFormat.META_CODEC_NAME,
+ Lucene60PointFormat.INDEX_VERSION_START,
+ Lucene60PointFormat.INDEX_VERSION_START,
+ readState.segmentInfo.getId(),
+ readState.segmentSuffix);
+ int count = indexIn.readVInt();
+ for(int i=0;i<count;i++) {
+ int fieldNumber = indexIn.readVInt();
+ long fp = indexIn.readVLong();
+ fieldToFileOffset.put(fieldNumber, fp);
+ }
+ } catch (Throwable t) {
+ priorE = t;
+ } finally {
+ CodecUtil.checkFooter(indexIn, priorE);
+ }
+ }
+
+ String dataFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
+ readState.segmentSuffix,
+ Lucene60PointFormat.DATA_EXTENSION);
+ boolean success = false;
+ dataIn = readState.directory.openInput(dataFileName, readState.context);
+ try {
+
+ CodecUtil.checkIndexHeader(dataIn,
+ Lucene60PointFormat.DATA_CODEC_NAME,
+ Lucene60PointFormat.DATA_VERSION_START,
+ Lucene60PointFormat.DATA_VERSION_START,
readState.segmentInfo.getId(),
readState.segmentSuffix);
- int count = indexIn.readVInt();
- for(int i=0;i<count;i++) {
- int fieldNumber = indexIn.readVInt();
- long fp = indexIn.readVLong();
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(dataIn);
+
+ for(Map.Entry<Integer,Long> ent : fieldToFileOffset.entrySet()) {
+ int fieldNumber = ent.getKey();
+ long fp = ent.getValue();
dataIn.seek(fp);
BKDReader reader = new BKDReader(dataIn);
readers.put(fieldNumber, reader);
- //reader.verify(readState.segmentInfo.maxDoc());
}
- CodecUtil.checkFooter(indexIn);
+
success = true;
} finally {
if (success == false) {
- IOUtils.closeWhileHandlingException(dataIn);
+ IOUtils.closeWhileHandlingException(this);
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointWriter.java
index 318d665..e7104b6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointWriter.java
@@ -27,12 +27,12 @@ import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PointReader;
import org.apache.lucene.codecs.PointWriter;
-import org.apache.lucene.index.PointValues.IntersectVisitor;
-import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
@@ -62,7 +62,7 @@ public class Lucene60PointWriter extends PointWriter implements Closeable {
boolean success = false;
try {
CodecUtil.writeIndexHeader(dataOut,
- Lucene60PointFormat.CODEC_NAME,
+ Lucene60PointFormat.DATA_CODEC_NAME,
Lucene60PointFormat.DATA_VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
@@ -184,7 +184,7 @@ public class Lucene60PointWriter extends PointWriter implements Closeable {
// Write index file
try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) {
CodecUtil.writeIndexHeader(indexOut,
- Lucene60PointFormat.CODEC_NAME,
+ Lucene60PointFormat.META_CODEC_NAME,
Lucene60PointFormat.INDEX_VERSION_CURRENT,
writeState.segmentInfo.getId(),
writeState.segmentSuffix);
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesCheckIndexHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesCheckIndexHeader.java
new file mode 100644
index 0000000..0c8ebd5
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesCheckIndexHeader.java
@@ -0,0 +1,126 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Collections;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.store.BaseDirectoryWrapper;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Test that a plain default detects broken index headers early (on opening a reader).
+ */
+public class TestAllFilesCheckIndexHeader extends LuceneTestCase {
+ public void test() throws Exception {
+ Directory dir = newDirectory();
+
+ IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+ conf.setCodec(TestUtil.getDefaultCodec());
+
+ // Disable CFS 80% of the time so we can truncate individual files, but the other 20% of the time we test truncation of .cfs/.cfe too:
+ if (random().nextInt(5) != 1) {
+ conf.setUseCompoundFile(false);
+ conf.getMergePolicy().setNoCFSRatio(0.0);
+ }
+
+ RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
+ // Use LineFileDocs so we (hopefully) get most Lucene features
+ // tested, e.g. IntPoint was recently added to it:
+ LineFileDocs docs = new LineFileDocs(random());
+ for (int i = 0; i < 100; i++) {
+ riw.addDocument(docs.nextDoc());
+ if (random().nextInt(7) == 0) {
+ riw.commit();
+ }
+ if (random().nextInt(20) == 0) {
+ riw.deleteDocuments(new Term("docid", Integer.toString(i)));
+ }
+ if (random().nextInt(15) == 0) {
+ riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
+ }
+ }
+
+ if (TEST_NIGHTLY == false) {
+ riw.forceMerge(1);
+ }
+ riw.close();
+ checkIndexHeader(dir);
+ dir.close();
+ }
+
+ private void checkIndexHeader(Directory dir) throws IOException {
+ for(String name : dir.listAll()) {
+ checkOneFile(dir, name);
+ }
+ }
+
+ private void checkOneFile(Directory dir, String victim) throws IOException {
+ try (BaseDirectoryWrapper dirCopy = newDirectory()) {
+ dirCopy.setCheckIndexOnClose(false);
+ long victimLength = dir.fileLength(victim);
+ int wrongBytes = TestUtil.nextInt(random(), 1, (int) Math.min(100, victimLength));
+ assert victimLength > 0;
+
+ if (VERBOSE) {
+ System.out.println("TEST: now break file " + victim + " by randomizing first " + wrongBytes + " of " + victimLength);
+ }
+
+ for(String name : dir.listAll()) {
+ if (name.equals(victim) == false) {
+ dirCopy.copyFrom(dir, name, name, IOContext.DEFAULT);
+ } else {
+ try(IndexOutput out = dirCopy.createOutput(name, IOContext.DEFAULT);
+ IndexInput in = dir.openInput(name, IOContext.DEFAULT)) {
+ // keeps same file length, but replaces the first wrongBytes with random bytes:
+ byte[] bytes = new byte[wrongBytes];
+ random().nextBytes(bytes);
+ out.writeBytes(bytes, 0, bytes.length);
+ in.seek(wrongBytes);
+ out.copyBytes(in, victimLength - wrongBytes);
+ }
+ }
+ dirCopy.sync(Collections.singleton(name));
+ }
+
+ try {
+ // NOTE: we .close so that if the test fails (truncation not detected) we don't also get all these confusing errors about open files:
+ DirectoryReader.open(dirCopy).close();
+ fail("wrong bytes not detected after randomizing first " + wrongBytes + " bytes out of " + victimLength + " for file " + victim);
+ } catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
+ // expected
+ }
+
+ // CheckIndex should also fail:
+ try {
+ TestUtil.checkIndex(dirCopy, true, true);
+ fail("wrong bytes not detected after randomizing first " + wrongBytes + " bytes out of " + victimLength + " for file " + victim);
+ } catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
+ // expected
+ }
+ }
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java
new file mode 100644
index 0000000..d1d95b4
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectTruncation.java
@@ -0,0 +1,120 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Collections;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.store.BaseDirectoryWrapper;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Test that a plain default detects index file truncation early (on opening a reader).
+ */
+public class TestAllFilesDetectTruncation extends LuceneTestCase {
+ public void test() throws Exception {
+ Directory dir = newDirectory();
+
+ IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+ conf.setCodec(TestUtil.getDefaultCodec());
+
+ // Disable CFS 80% of the time so we can truncate individual files, but the other 20% of the time we test truncation of .cfs/.cfe too:
+ if (random().nextInt(5) != 1) {
+ conf.setUseCompoundFile(false);
+ conf.getMergePolicy().setNoCFSRatio(0.0);
+ }
+
+ RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
+ // Use LineFileDocs so we (hopefully) get most Lucene features
+ // tested, e.g. IntPoint was recently added to it:
+ LineFileDocs docs = new LineFileDocs(random());
+ for (int i = 0; i < 100; i++) {
+ riw.addDocument(docs.nextDoc());
+ if (random().nextInt(7) == 0) {
+ riw.commit();
+ }
+ if (random().nextInt(20) == 0) {
+ riw.deleteDocuments(new Term("docid", Integer.toString(i)));
+ }
+ if (random().nextInt(15) == 0) {
+ riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
+ }
+ }
+ if (TEST_NIGHTLY == false) {
+ riw.forceMerge(1);
+ }
+ riw.close();
+ checkTruncation(dir);
+ dir.close();
+ }
+
+ private void checkTruncation(Directory dir) throws IOException {
+ for(String name : dir.listAll()) {
+ truncateOneFile(dir, name);
+ }
+ }
+
+ private void truncateOneFile(Directory dir, String victim) throws IOException {
+ try (BaseDirectoryWrapper dirCopy = newDirectory()) {
+ dirCopy.setCheckIndexOnClose(false);
+ long victimLength = dir.fileLength(victim);
+ int lostBytes = TestUtil.nextInt(random(), 1, (int) Math.min(100, victimLength));
+ assert victimLength > 0;
+
+ if (VERBOSE) {
+ System.out.println("TEST: now truncate file " + victim + " by removing " + lostBytes + " of " + victimLength + " bytes");
+ }
+
+ for(String name : dir.listAll()) {
+ if (name.equals(victim) == false) {
+ dirCopy.copyFrom(dir, name, name, IOContext.DEFAULT);
+ } else {
+ try(IndexOutput out = dirCopy.createOutput(name, IOContext.DEFAULT);
+ IndexInput in = dir.openInput(name, IOContext.DEFAULT)) {
+ out.copyBytes(in, victimLength - lostBytes);
+ }
+ }
+ dirCopy.sync(Collections.singleton(name));
+ }
+
+ try {
+ // NOTE: we .close so that if the test fails (truncation not detected) we don't also get all these confusing errors about open files:
+ DirectoryReader.open(dirCopy).close();
+ fail("truncation not detected after removing " + lostBytes + " bytes out of " + victimLength + " for file " + victim);
+ } catch (CorruptIndexException | EOFException e) {
+ // expected
+ }
+
+ // CheckIndex should also fail:
+ try {
+ TestUtil.checkIndex(dirCopy, true, true);
+ fail("truncation not detected after removing " + lostBytes + " bytes out of " + victimLength + " for file " + victim);
+ } catch (CorruptIndexException | EOFException e) {
+ // expected
+ }
+ }
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
index 66eb343..710d20f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
@@ -21,11 +21,9 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@@ -38,23 +36,19 @@ public class TestAllFilesHaveChecksumFooter extends LuceneTestCase {
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.getDefaultCodec());
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
- Document doc = new Document();
- // these fields should sometimes get term vectors, etc
- Field idField = newStringField("id", "", Field.Store.NO);
- Field bodyField = newTextField("body", "", Field.Store.NO);
- Field dvField = new NumericDocValuesField("dv", 5);
- doc.add(idField);
- doc.add(bodyField);
- doc.add(dvField);
+ // Use LineFileDocs so we (hopefully) get most Lucene features
+ // tested, e.g. IntPoint was recently added to it:
+ LineFileDocs docs = new LineFileDocs(random());
for (int i = 0; i < 100; i++) {
- idField.setStringValue(Integer.toString(i));
- bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
- riw.addDocument(doc);
+ riw.addDocument(docs.nextDoc());
if (random().nextInt(7) == 0) {
riw.commit();
}
if (random().nextInt(20) == 0) {
- riw.deleteDocuments(new Term("id", Integer.toString(i)));
+ riw.deleteDocuments(new Term("docid", Integer.toString(i)));
+ }
+ if (random().nextInt(15) == 0) {
+ riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
}
}
riw.close();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
index c2b515d..d726019 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
@@ -23,13 +23,9 @@ import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@@ -43,32 +39,19 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.getDefaultCodec());
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
- Document doc = new Document();
- Field idField = newStringField("id", "", Field.Store.YES);
- Field bodyField = newTextField("body", "", Field.Store.YES);
- FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
- vectorsType.setStoreTermVectors(true);
- vectorsType.setStoreTermVectorPositions(true);
- Field vectorsField = new Field("vectors", "", vectorsType);
- Field dvField = new NumericDocValuesField("dv", 5);
- doc.add(idField);
- doc.add(bodyField);
- doc.add(vectorsField);
- doc.add(dvField);
+ // Use LineFileDocs so we (hopefully) get most Lucene features
+ // tested, e.g. IntPoint was recently added to it:
+ LineFileDocs docs = new LineFileDocs(random());
for (int i = 0; i < 100; i++) {
- idField.setStringValue(Integer.toString(i));
- bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
- dvField.setLongValue(random().nextInt(5));
- vectorsField.setStringValue(TestUtil.randomUnicodeString(random()));
- riw.addDocument(doc);
+ riw.addDocument(docs.nextDoc());
if (random().nextInt(7) == 0) {
riw.commit();
}
if (random().nextInt(20) == 0) {
- riw.deleteDocuments(new Term("id", Integer.toString(i)));
+ riw.deleteDocuments(new Term("docid", Integer.toString(i)));
}
if (random().nextInt(15) == 0) {
- riw.updateNumericDocValue(new Term("id"), "dv", Long.valueOf(i));
+ riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
}
}
riw.close();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSwappedIndexFiles.java b/lucene/core/src/test/org/apache/lucene/index/TestSwappedIndexFiles.java
new file mode 100644
index 0000000..27cc3cc
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSwappedIndexFiles.java
@@ -0,0 +1,117 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Random;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.BaseDirectoryWrapper;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Test that the same file name, but from a different index, is detected as foreign.
+ */
+public class TestSwappedIndexFiles extends LuceneTestCase {
+ public void test() throws Exception {
+ Directory dir1 = newDirectory();
+ Directory dir2 = newDirectory();
+
+ // Disable CFS 80% of the time so we can truncate individual files, but the other 20% of the time we test truncation of .cfs/.cfe too:
+ boolean useCFS = random().nextInt(5) == 1;
+
+ // Use LineFileDocs so we (hopefully) get most Lucene features
+ // tested, e.g. IntPoint was recently added to it:
+ LineFileDocs docs = new LineFileDocs(random());
+ Document doc = docs.nextDoc();
+ long seed = random().nextLong();
+
+ indexOneDoc(seed, dir1, doc, useCFS);
+ indexOneDoc(seed, dir2, doc, useCFS);
+
+ swapFiles(dir1, dir2);
+ dir1.close();
+ dir2.close();
+ }
+
+ private void indexOneDoc(long seed, Directory dir, Document doc, boolean useCFS) throws IOException {
+ Random random = new Random(seed);
+ IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
+ conf.setCodec(TestUtil.getDefaultCodec());
+
+ if (useCFS == false) {
+ conf.setUseCompoundFile(false);
+ conf.getMergePolicy().setNoCFSRatio(0.0);
+ } else {
+ conf.setUseCompoundFile(true);
+ conf.getMergePolicy().setNoCFSRatio(1.0);
+ }
+
+ RandomIndexWriter w = new RandomIndexWriter(random, dir, conf);
+ w.addDocument(doc);
+ w.close();
+ }
+
+ private void swapFiles(Directory dir1, Directory dir2) throws IOException {
+ for(String name : dir1.listAll()) {
+ if (name.equals(IndexWriter.WRITE_LOCK_NAME)) {
+ continue;
+ }
+ swapOneFile(dir1, dir2, name);
+ }
+ }
+
+ private void swapOneFile(Directory dir1, Directory dir2, String victim) throws IOException {
+ try (BaseDirectoryWrapper dirCopy = newDirectory()) {
+ dirCopy.setCheckIndexOnClose(false);
+
+ // Copy all files from dir1 to dirCopy, except victim which we copy from dir2:
+ for(String name : dir1.listAll()) {
+ if (name.equals(victim) == false) {
+ dirCopy.copyFrom(dir1, name, name, IOContext.DEFAULT);
+ } else {
+ dirCopy.copyFrom(dir2, name, name, IOContext.DEFAULT);
+ }
+ dirCopy.sync(Collections.singleton(name));
+ }
+
+ try {
+ // NOTE: we .close so that if the test fails (truncation not detected) we don't also get all these confusing errors about open files:
+ DirectoryReader.open(dirCopy).close();
+ fail("wrong file " + victim + " not detected");
+ } catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
+ // expected
+ }
+
+ // CheckIndex should also fail:
+ try {
+ TestUtil.checkIndex(dirCopy, true, true);
+ fail("wrong file " + victim + " not detected");
+ } catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
+ // expected
+ }
+ }
+ }
+}