blob: 22fe85511e409656b1f4838e6f9e5c687cbfc9a6 [file] [log] [blame]
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (working copy)
@@ -177,7 +177,10 @@
}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
+ if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ dirOffset = input.readLong();
+ } else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@@ -863,4 +866,14 @@
sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0;
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {
+ // verify terms
+ if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
+ // verify postings
+ postingsReader.validate();
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (working copy)
@@ -63,12 +63,13 @@
public static final int VERSION_START = 0;
public static final int VERSION_APPEND_ONLY = 1;
public static final int VERSION_META_ARRAY = 2;
- public static final int VERSION_CURRENT = VERSION_META_ARRAY;
+ public static final int VERSION_CHECKSUM = 3;
+ public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of terms file */
static final String TERMS_EXTENSION = "tib";
- protected final IndexOutput out;
+ protected IndexOutput out;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
FieldInfo currentField;
@@ -176,26 +177,30 @@
}
public void close() throws IOException {
- try {
- final long dirStart = out.getFilePointer();
-
- out.writeVInt(fields.size());
- for(FieldMetaData field : fields) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.numTerms);
- out.writeVLong(field.termsStartPointer);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
+ if (out != null) {
+ try {
+ final long dirStart = out.getFilePointer();
+
+ out.writeVInt(fields.size());
+ for(FieldMetaData field : fields) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.numTerms);
+ out.writeVLong(field.termsStartPointer);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ out.writeVLong(field.sumTotalTermFreq);
+ }
+ out.writeVLong(field.sumDocFreq);
+ out.writeVInt(field.docCount);
+ if (VERSION_CURRENT >= VERSION_META_ARRAY) {
+ out.writeVInt(field.longsSize);
+ }
}
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
- if (VERSION_CURRENT >= VERSION_META_ARRAY) {
- out.writeVInt(field.longsSize);
- }
+ writeTrailer(dirStart);
+ CodecUtil.writeFooter(out);
+ } finally {
+ IOUtils.close(out, postingsWriter, termsIndexWriter);
+ out = null;
}
- writeTrailer(dirStart);
- } finally {
- IOUtils.close(out, postingsWriter, termsIndexWriter);
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (working copy)
@@ -66,6 +66,8 @@
// start of the field info data
private long dirOffset;
+ private int version;
+
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
throws IOException {
@@ -78,6 +80,11 @@
try {
readHeader(in);
+
+ if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
+
indexInterval = in.readVInt();
if (indexInterval < 1) {
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
@@ -124,7 +131,7 @@
}
private void readHeader(IndexInput input) throws IOException {
- CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
+ version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT);
}
@@ -273,7 +280,11 @@
public void close() throws IOException {}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- input.seek(input.length() - 8);
+ if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ } else {
+ input.seek(input.length() - 8);
+ }
dirOffset = input.readLong();
input.seek(dirOffset);
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (working copy)
@@ -26,7 +26,6 @@
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
@@ -43,7 +42,7 @@
*
* @lucene.experimental */
public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
- protected final IndexOutput out;
+ protected IndexOutput out;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tii";
@@ -52,7 +51,8 @@
final static int VERSION_START = 0;
final static int VERSION_APPEND_ONLY = 1;
final static int VERSION_MONOTONIC_ADDRESSING = 2;
- final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING;
+ final static int VERSION_CHECKSUM = 3;
+ final static int VERSION_CURRENT = VERSION_CHECKSUM;
final static int BLOCKSIZE = 4096;
final private int termIndexInterval;
@@ -207,39 +207,43 @@
@Override
public void close() throws IOException {
- boolean success = false;
- try {
- final long dirStart = out.getFilePointer();
- final int fieldCount = fields.size();
-
- int nonNullFieldCount = 0;
- for(int i=0;i<fieldCount;i++) {
- SimpleFieldWriter field = fields.get(i);
- if (field.numIndexTerms > 0) {
- nonNullFieldCount++;
+ if (out != null) {
+ boolean success = false;
+ try {
+ final long dirStart = out.getFilePointer();
+ final int fieldCount = fields.size();
+
+ int nonNullFieldCount = 0;
+ for(int i=0;i<fieldCount;i++) {
+ SimpleFieldWriter field = fields.get(i);
+ if (field.numIndexTerms > 0) {
+ nonNullFieldCount++;
+ }
}
- }
-
- out.writeVInt(nonNullFieldCount);
- for(int i=0;i<fieldCount;i++) {
- SimpleFieldWriter field = fields.get(i);
- if (field.numIndexTerms > 0) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVInt(field.numIndexTerms);
- out.writeVLong(field.termsStart);
- out.writeVLong(field.indexStart);
- out.writeVLong(field.packedIndexStart);
- out.writeVLong(field.packedOffsetsStart);
+
+ out.writeVInt(nonNullFieldCount);
+ for(int i=0;i<fieldCount;i++) {
+ SimpleFieldWriter field = fields.get(i);
+ if (field.numIndexTerms > 0) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVInt(field.numIndexTerms);
+ out.writeVLong(field.termsStart);
+ out.writeVLong(field.indexStart);
+ out.writeVLong(field.packedIndexStart);
+ out.writeVLong(field.packedOffsetsStart);
+ }
}
+ writeTrailer(dirStart);
+ CodecUtil.writeFooter(out);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(out);
+ } else {
+ IOUtils.closeWhileHandlingException(out);
+ }
+ out = null;
}
- writeTrailer(dirStart);
- success = true;
- } finally {
- if (success) {
- IOUtils.close(out);
- } else {
- IOUtils.closeWhileHandlingException(out);
- }
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (working copy)
@@ -62,6 +62,10 @@
try {
version = readHeader(in);
+
+ if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
seekDir(in, dirOffset);
@@ -190,7 +194,10 @@
public void close() throws IOException {}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+ if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ dirOffset = input.readLong();
+ } else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (working copy)
@@ -45,7 +45,7 @@
*
* @lucene.experimental */
public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
- protected final IndexOutput out;
+ protected IndexOutput out;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tiv";
@@ -53,7 +53,8 @@
final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX";
final static int VERSION_START = 0;
final static int VERSION_APPEND_ONLY = 1;
- final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
+ final static int VERSION_CHECKSUM = 2;
+ final static int VERSION_CURRENT = VERSION_CHECKSUM;
private final List<FSTFieldWriter> fields = new ArrayList<>();
@@ -290,31 +291,35 @@
@Override
public void close() throws IOException {
- try {
- final long dirStart = out.getFilePointer();
- final int fieldCount = fields.size();
-
- int nonNullFieldCount = 0;
- for(int i=0;i<fieldCount;i++) {
- FSTFieldWriter field = fields.get(i);
- if (field.fst != null) {
- nonNullFieldCount++;
+ if (out != null) {
+ try {
+ final long dirStart = out.getFilePointer();
+ final int fieldCount = fields.size();
+
+ int nonNullFieldCount = 0;
+ for(int i=0;i<fieldCount;i++) {
+ FSTFieldWriter field = fields.get(i);
+ if (field.fst != null) {
+ nonNullFieldCount++;
+ }
+ }
+
+ out.writeVInt(nonNullFieldCount);
+ for(int i=0;i<fieldCount;i++) {
+ FSTFieldWriter field = fields.get(i);
+ if (field.fst != null) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.indexStart);
+ }
+ }
+ writeTrailer(dirStart);
+ CodecUtil.writeFooter(out);
+ } finally {
+ out.close();
+ out = null;
}
}
-
- out.writeVInt(nonNullFieldCount);
- for(int i=0;i<fieldCount;i++) {
- FSTFieldWriter field = fields.get(i);
- if (field.fst != null) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.indexStart);
- }
- }
- writeTrailer(dirStart);
- } finally {
- out.close();
}
- }
private void writeTrailer(long dirStart) throws IOException {
out.writeLong(dirStart);
Index: lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (working copy)
@@ -39,8 +39,8 @@
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -66,7 +66,7 @@
* </p>
* <ul>
* <li>BloomFilter (.blm) --&gt; Header, DelegatePostingsFormatName,
- * NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
+ * NumFilteredFields, Filter<sup>NumFilteredFields</sup>, Footer</li>
* <li>Filter --&gt; FieldNumber, FuzzySet</li>
* <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput)}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
@@ -75,6 +75,7 @@
* <li>NumFilteredFields --&gt; {@link DataOutput#writeInt Uint32}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt Uint32} The number of the
* field in this segment</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* @lucene.experimental
*/
@@ -81,7 +82,9 @@
public final class BloomFilteringPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "BloomFilter";
- public static final int BLOOM_CODEC_VERSION = 1;
+ public static final int VERSION_START = 1;
+ public static final int VERSION_CHECKSUM = 2;
+ public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of Bloom Filters file */
static final String BLOOM_EXTENSION = "blm";
@@ -157,12 +160,11 @@
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
- IndexInput bloomIn = null;
+ ChecksumIndexInput bloomIn = null;
boolean success = false;
try {
- bloomIn = state.directory.openInput(bloomFileName, state.context);
- CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
- BLOOM_CODEC_VERSION);
+ bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
+ int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
@@ -178,6 +180,11 @@
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(bloomIn);
+ } else {
+ CodecUtil.checkEOF(bloomIn);
+ }
IOUtils.close(bloomIn);
success = true;
} finally {
@@ -390,6 +397,11 @@
}
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {
+ delegateFieldsProducer.validate();
+ }
}
class BloomFilteredFieldsConsumer extends FieldsConsumer {
@@ -466,10 +478,8 @@
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexOutput bloomOutput = null;
try {
- bloomOutput = state.directory
- .createOutput(bloomFileName, state.context);
- CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
- BLOOM_CODEC_VERSION);
+ bloomOutput = state.directory.createOutput(bloomFileName, state.context);
+ CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT);
// remember the name of the postings format we will delegate to
bloomOutput.writeString(delegatePostingsFormat.getName());
@@ -481,6 +491,7 @@
bloomOutput.writeInt(fieldInfo.number);
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
}
+ CodecUtil.writeFooter(bloomOutput);
} finally {
IOUtils.close(bloomOutput);
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (working copy)
@@ -40,7 +40,7 @@
*/
class DirectDocValuesConsumer extends DocValuesConsumer {
- final IndexOutput data, meta;
+ IndexOutput data, meta;
final int maxDoc;
DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
@@ -142,7 +142,11 @@
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
}
+ if (data != null) {
+ CodecUtil.writeFooter(data);
+ }
success = true;
} finally {
if (success) {
@@ -150,6 +154,7 @@
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
+ data = meta = null;
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (working copy)
@@ -33,6 +33,7 @@
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -65,6 +66,7 @@
private final int maxDoc;
private final AtomicLong ramBytesUsed;
+ private final int version;
static final byte NUMBER = 0;
static final byte BYTES = 1;
@@ -72,16 +74,16 @@
static final byte SORTED_SET = 3;
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_CHECKSUM = 1;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
- IndexInput in = state.directory.openInput(metaName, state.context);
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
boolean success = false;
- final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
@@ -88,6 +90,11 @@
VERSION_CURRENT);
readFields(in);
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(in);
+ } else {
+ CodecUtil.checkEOF(in);
+ }
success = true;
} finally {
if (success) {
@@ -186,6 +193,13 @@
}
@Override
+ public void validate() throws IOException {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
+
+ @Override
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericDocValues instance = numericInstances.get(field.number);
if (instance == null) {
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (working copy)
@@ -109,6 +109,7 @@
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
+ postings.validate();
loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
} finally {
postings.close();
@@ -157,6 +158,12 @@
}
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {
+ // if we read entirely into ram, we already validated.
+ // otherwise returned the raw postings reader
+ }
}
private final static class DirectField extends Terms {
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (working copy)
@@ -38,6 +38,7 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@@ -56,7 +57,6 @@
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader;
/**
* FST-based terms dictionary reader.
@@ -63,7 +63,7 @@
*
* The FST index maps each term and its ord, and during seek
* the ord is used fetch metadata from a single block.
- * The term dictionary is fully memeory resident.
+ * The term dictionary is fully memory resident.
*
* @lucene.experimental
*/
@@ -71,8 +71,7 @@
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
- IndexInput indexIn = null;
- IndexInput blockIn = null;
+ int version;
//static final boolean TEST = false;
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
@@ -80,11 +79,18 @@
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
this.postingsReader = postingsReader;
+ ChecksumIndexInput indexIn = null;
+ IndexInput blockIn = null;
+ boolean success = false;
try {
- this.indexIn = state.directory.openInput(termsIndexFileName, state.context);
- this.blockIn = state.directory.openInput(termsBlockFileName, state.context);
- readHeader(indexIn);
+ indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context);
+ blockIn = state.directory.openInput(termsBlockFileName, state.context);
+ version = readHeader(indexIn);
readHeader(blockIn);
+ if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(blockIn);
+ }
+
this.postingsReader.init(blockIn);
seekDir(blockIn);
@@ -100,12 +106,22 @@
int longsSize = blockIn.readVInt();
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
- TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
+ TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
TermsReader previous = fields.put(fieldInfo.name, current);
- checkFieldSummary(state.segmentInfo, current, previous);
+ checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous);
}
+ if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(indexIn);
+ } else {
+ CodecUtil.checkEOF(indexIn);
+ }
+ success = true;
} finally {
- IOUtils.closeWhileHandlingException(indexIn, blockIn);
+ if (success) {
+ IOUtils.close(indexIn, blockIn);
+ } else {
+ IOUtils.closeWhileHandlingException(indexIn, blockIn);
+ }
}
}
@@ -115,10 +131,14 @@
FSTOrdTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
- in.seek(in.length() - 8);
+ if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
+ in.seek(in.length() - CodecUtil.footerLength() - 8);
+ } else {
+ in.seek(in.length() - 8);
+ }
in.seek(in.readLong());
}
- private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
+ private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")");
@@ -176,7 +196,7 @@
final byte[] metaLongsBlock;
final byte[] metaBytesBlock;
- TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
+ TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@@ -819,4 +839,9 @@
}
return ramBytesUsed;
}
+
+ @Override
+ public void validate() throws IOException {
+ postingsReader.validate();
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (working copy)
@@ -73,9 +73,10 @@
* </p>
*
* <ul>
- * <li>TermIndex(.tix) --&gt; Header, TermFST<sup>NumFields</sup></li>
+ * <li>TermIndex(.tix) --&gt; Header, TermFST<sup>NumFields</sup>, Footer</li>
* <li>TermFST --&gt; {@link FST FST&lt;long&gt;}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
*
* <p>Notes:</p>
@@ -103,7 +104,7 @@
* <ul>
* <li>TermBlock(.tbk) --&gt; Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
- * DocCount, LongsSize, DataBlock &gt; <sup>NumFields</sup></li>
+ * DocCount, LongsSize, DataBlock &gt; <sup>NumFields</sup>, Footer</li>
*
* <li>DataBlock --&gt; StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
@@ -119,6 +120,7 @@
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
* LongDelta,--&gt; {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes: </p>
* <ul>
@@ -148,7 +150,8 @@
static final String TERMS_BLOCK_EXTENSION = "tbk";
static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
- public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
+ public static final int TERMS_VERSION_CHECKSUM = 1;
+ public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
public static final int SKIP_INTERVAL = 8;
final PostingsWriterBase postingsWriter;
@@ -218,36 +221,41 @@
}
public void close() throws IOException {
- IOException ioe = null;
- try {
- final long blockDirStart = blockOut.getFilePointer();
-
- // write field summary
- blockOut.writeVInt(fields.size());
- for (FieldMetaData field : fields) {
- blockOut.writeVInt(field.fieldInfo.number);
- blockOut.writeVLong(field.numTerms);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- blockOut.writeVLong(field.sumTotalTermFreq);
+ if (blockOut != null) {
+ IOException ioe = null;
+ try {
+ final long blockDirStart = blockOut.getFilePointer();
+
+ // write field summary
+ blockOut.writeVInt(fields.size());
+ for (FieldMetaData field : fields) {
+ blockOut.writeVInt(field.fieldInfo.number);
+ blockOut.writeVLong(field.numTerms);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ blockOut.writeVLong(field.sumTotalTermFreq);
+ }
+ blockOut.writeVLong(field.sumDocFreq);
+ blockOut.writeVInt(field.docCount);
+ blockOut.writeVInt(field.longsSize);
+ blockOut.writeVLong(field.statsOut.getFilePointer());
+ blockOut.writeVLong(field.metaLongsOut.getFilePointer());
+ blockOut.writeVLong(field.metaBytesOut.getFilePointer());
+
+ field.skipOut.writeTo(blockOut);
+ field.statsOut.writeTo(blockOut);
+ field.metaLongsOut.writeTo(blockOut);
+ field.metaBytesOut.writeTo(blockOut);
+ field.dict.save(indexOut);
}
- blockOut.writeVLong(field.sumDocFreq);
- blockOut.writeVInt(field.docCount);
- blockOut.writeVInt(field.longsSize);
- blockOut.writeVLong(field.statsOut.getFilePointer());
- blockOut.writeVLong(field.metaLongsOut.getFilePointer());
- blockOut.writeVLong(field.metaBytesOut.getFilePointer());
-
- field.skipOut.writeTo(blockOut);
- field.statsOut.writeTo(blockOut);
- field.metaLongsOut.writeTo(blockOut);
- field.metaBytesOut.writeTo(blockOut);
- field.dict.save(indexOut);
+ writeTrailer(blockOut, blockDirStart);
+ CodecUtil.writeFooter(indexOut);
+ CodecUtil.writeFooter(blockOut);
+ } catch (IOException ioe2) {
+ ioe = ioe2;
+ } finally {
+ IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
+ blockOut = null;
}
- writeTrailer(blockOut, blockDirStart);
- } catch (IOException ioe2) {
- ioe = ioe2;
- } finally {
- IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (working copy)
@@ -59,7 +59,7 @@
* FST-based terms dictionary reader.
*
* The FST directly maps each term and its metadata,
- * it is memeory resident.
+ * it is memory resident.
*
* @lucene.experimental
*/
@@ -67,18 +67,21 @@
public class FSTTermsReader extends FieldsProducer {
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
- final IndexInput in;
//static boolean TEST = false;
+ final int version;
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
- this.in = state.directory.openInput(termsFileName, state.context);
+ final IndexInput in = state.directory.openInput(termsFileName, state.context);
boolean success = false;
try {
- readHeader(in);
+ version = readHeader(in);
+ if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
this.postingsReader.init(in);
seekDir(in);
@@ -92,13 +95,15 @@
long sumDocFreq = in.readVLong();
int docCount = in.readVInt();
int longsSize = in.readVInt();
- TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
+ TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
TermsReader previous = fields.put(fieldInfo.name, current);
- checkFieldSummary(state.segmentInfo, current, previous);
+ checkFieldSummary(state.segmentInfo, in, current, previous);
}
success = true;
} finally {
- if (!success) {
+ if (success) {
+ IOUtils.close(in);
+ } else {
IOUtils.closeWhileHandlingException(in);
}
}
@@ -110,10 +115,14 @@
FSTTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
- in.seek(in.length() - 8);
+ if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
+ in.seek(in.length() - CodecUtil.footerLength() - 8);
+ } else {
+ in.seek(in.length() - 8);
+ }
in.seek(in.readLong());
}
- private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
+ private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
@@ -150,7 +159,7 @@
@Override
public void close() throws IOException {
try {
- IOUtils.close(in, postingsReader);
+ IOUtils.close(postingsReader);
} finally {
fields.clear();
}
@@ -165,7 +174,7 @@
final int longsSize;
final FST<FSTTermOutputs.TermData> dict;
- TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
+ TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@@ -729,4 +738,9 @@
}
return ramBytesUsed;
}
+
+ @Override
+ public void validate() throws IOException {
+ postingsReader.validate();
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (working copy)
@@ -124,11 +124,12 @@
static final String TERMS_EXTENSION = "tmp";
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
- public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
+ public static final int TERMS_VERSION_CHECKSUM = 1;
+ public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
- final IndexOutput out;
+ IndexOutput out;
final int maxDoc;
final List<FieldMetaData> fields = new ArrayList<>();
@@ -199,28 +200,32 @@
}
public void close() throws IOException {
- IOException ioe = null;
- try {
- // write field summary
- final long dirStart = out.getFilePointer();
-
- out.writeVInt(fields.size());
- for (FieldMetaData field : fields) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.numTerms);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
+ if (out != null) {
+ IOException ioe = null;
+ try {
+ // write field summary
+ final long dirStart = out.getFilePointer();
+
+ out.writeVInt(fields.size());
+ for (FieldMetaData field : fields) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.numTerms);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ out.writeVLong(field.sumTotalTermFreq);
+ }
+ out.writeVLong(field.sumDocFreq);
+ out.writeVInt(field.docCount);
+ out.writeVInt(field.longsSize);
+ field.dict.save(out);
}
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
- out.writeVInt(field.longsSize);
- field.dict.save(out);
+ writeTrailer(out, dirStart);
+ CodecUtil.writeFooter(out);
+ } catch (IOException ioe2) {
+ ioe = ioe2;
+ } finally {
+ IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
+ out = null;
}
- writeTrailer(out, dirStart);
- } catch (IOException ioe2) {
- ioe = ioe2;
- } finally {
- IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java (working copy)
@@ -59,7 +59,7 @@
* Writer for {@link MemoryDocValuesFormat}
*/
class MemoryDocValuesConsumer extends DocValuesConsumer {
- final IndexOutput data, meta;
+ IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;
@@ -208,7 +208,11 @@
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
}
+ if (data != null) {
+ CodecUtil.writeFooter(data);
+ }
success = true;
} finally {
if (success) {
@@ -216,6 +220,7 @@
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
+ data = meta = null;
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (working copy)
@@ -37,6 +37,7 @@
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -77,6 +78,7 @@
private final int maxDoc;
private final AtomicLong ramBytesUsed;
+ private final int version;
static final byte NUMBER = 0;
static final byte BYTES = 1;
@@ -91,15 +93,15 @@
static final int VERSION_START = 0;
static final int VERSION_GCD_COMPRESSION = 1;
- static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
+ static final int VERSION_CHECKSUM = 2;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
- IndexInput in = state.directory.openInput(metaName, state.context);
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
boolean success = false;
- final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
@@ -108,6 +110,11 @@
binaries = new HashMap<>();
fsts = new HashMap<>();
readFields(in, state.fieldInfos);
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(in);
+ } else {
+ CodecUtil.checkEOF(in);
+ }
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
success = true;
} finally {
@@ -208,6 +215,13 @@
return ramBytesUsed.get();
}
+ @Override
+ public void validate() throws IOException {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
+
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.number);
data.seek(entry.offset + entry.missingBytes);
Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (working copy)
@@ -25,6 +25,7 @@
import java.util.SortedMap;
import java.util.TreeMap;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
@@ -41,6 +42,7 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -271,6 +273,9 @@
}
private static String EXTENSION = "ram";
+ private static final String CODEC_NAME = "MemoryPostings";
+ private static final int VERSION_START = 0;
+ private static final int VERSION_CURRENT = VERSION_START;
private class MemoryFieldsConsumer extends FieldsConsumer implements Closeable {
private final SegmentWriteState state;
@@ -279,6 +284,7 @@
private MemoryFieldsConsumer(SegmentWriteState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
out = state.directory.createOutput(fileName, state.context);
+ CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
this.state = state;
}
@@ -403,6 +409,7 @@
// EOF marker:
try {
out.writeVInt(0);
+ CodecUtil.writeFooter(out);
} finally {
out.close();
}
@@ -951,7 +958,8 @@
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
- final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE);
+ final ChecksumIndexInput in = state.directory.openChecksumInput(fileName, IOContext.READONCE);
+ CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
final SortedMap<String,TermsReader> fields = new TreeMap<>();
@@ -965,6 +973,7 @@
// System.out.println("load field=" + termsReader.field.name);
fields.put(termsReader.field.name, termsReader);
}
+ CodecUtil.checkFooter(in);
} finally {
in.close();
}
@@ -1002,6 +1011,9 @@
}
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {}
};
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (working copy)
@@ -653,4 +653,9 @@
public long ramBytesUsed() {
return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0);
}
+
+ @Override
+ public void validate() throws IOException {
+ wrappedPostingsReader.validate();
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (working copy)
@@ -706,4 +706,9 @@
public long ramBytesUsed() {
return 0;
}
+
+ @Override
+ public void validate() throws IOException {
+ // TODO: remove sep layout, its fallen behind on features...
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (working copy)
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
@@ -47,6 +48,8 @@
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -467,4 +470,19 @@
public long ramBytesUsed() {
return 0;
}
+
+ @Override
+ public void validate() throws IOException {
+ BytesRef scratch = new BytesRef();
+ IndexInput clone = data.clone();
+ clone.seek(0);
+ ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
+ while(true) {
+ SimpleTextUtil.readLine(input, scratch);
+ if (scratch.equals(END)) {
+ SimpleTextUtil.checkFooter(input, CHECKSUM);
+ break;
+ }
+ }
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (working copy)
@@ -36,6 +36,7 @@
import org.apache.lucene.util.IOUtils;
class SimpleTextDocValuesWriter extends DocValuesConsumer {
+ final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TYPE = new BytesRef(" type ");
@@ -49,7 +50,7 @@
final static BytesRef NUMVALUES = new BytesRef(" numvalues ");
final static BytesRef ORDPATTERN = new BytesRef(" ordpattern ");
- final IndexOutput data;
+ IndexOutput data;
final BytesRef scratch = new BytesRef();
final int numDocs;
private final Set<String> fieldsSeen = new HashSet<>(); // for asserting
@@ -389,18 +390,25 @@
@Override
public void close() throws IOException {
- boolean success = false;
- try {
- assert !fieldsSeen.isEmpty();
- // TODO: sheisty to do this here?
- SimpleTextUtil.write(data, END);
- SimpleTextUtil.writeNewline(data);
- success = true;
- } finally {
- if (success) {
- IOUtils.close(data);
- } else {
- IOUtils.closeWhileHandlingException(data);
+ if (data != null) {
+ boolean success = false;
+ try {
+ assert !fieldsSeen.isEmpty();
+ // TODO: sheisty to do this here?
+ SimpleTextUtil.write(data, END);
+ SimpleTextUtil.writeNewline(data);
+ String checksum = Long.toString(data.getChecksum());
+ SimpleTextUtil.write(data, CHECKSUM);
+ SimpleTextUtil.write(data, checksum, scratch);
+ SimpleTextUtil.writeNewline(data);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(data);
+ } else {
+ IOUtils.closeWhileHandlingException(data);
+ }
+ data = null;
}
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (working copy)
@@ -23,15 +23,14 @@
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosReader;
-import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
@@ -49,7 +48,7 @@
@Override
public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
- IndexInput input = directory.openInput(fileName, iocontext);
+ ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
BytesRef scratch = new BytesRef();
boolean success = false;
@@ -129,9 +128,7 @@
infos[i].setDocValuesGen(dvGen);
}
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
- }
+ SimpleTextUtil.checkFooter(input, CHECKSUM);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (working copy)
@@ -58,6 +58,7 @@
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef ATT_KEY = new BytesRef(" key ");
final static BytesRef ATT_VALUE = new BytesRef(" value ");
+ final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override
public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
@@ -132,6 +133,10 @@
}
}
}
+ String checksum = Long.toString(out.getChecksum());
+ SimpleTextUtil.write(out, CHECKSUM);
+ SimpleTextUtil.write(out, checksum, scratch);
+ SimpleTextUtil.writeNewline(out);
success = true;
} finally {
if (success) {
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (working copy)
@@ -33,6 +33,8 @@
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@@ -50,6 +52,17 @@
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
+
class SimpleTextFieldsReader extends FieldsProducer {
private final TreeMap<String,Long> fields;
private final IndexInput in;
@@ -56,16 +69,6 @@
private final FieldInfos fieldInfos;
private final int maxDoc;
- final static BytesRef END = SimpleTextFieldsWriter.END;
- final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
- final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
- final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
- final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
- final static BytesRef POS = SimpleTextFieldsWriter.POS;
- final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET;
- final static BytesRef END_OFFSET = SimpleTextFieldsWriter.END_OFFSET;
- final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
-
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
this.maxDoc = state.segmentInfo.getDocCount();
fieldInfos = state.fieldInfos;
@@ -82,16 +85,18 @@
}
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
+ ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
BytesRef scratch = new BytesRef(10);
TreeMap<String,Long> fields = new TreeMap<>();
while (true) {
- SimpleTextUtil.readLine(in, scratch);
+ SimpleTextUtil.readLine(input, scratch);
if (scratch.equals(END)) {
+ SimpleTextUtil.checkFooter(input, CHECKSUM);
return fields;
} else if (StringHelper.startsWith(scratch, FIELD)) {
String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
- fields.put(fieldName, in.getFilePointer());
+ fields.put(fieldName, input.getFilePointer());
}
}
}
@@ -668,4 +673,7 @@
}
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (working copy)
@@ -35,10 +35,11 @@
class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable {
- private final IndexOutput out;
+ private IndexOutput out;
private final BytesRef scratch = new BytesRef(10);
private final SegmentWriteState writeState;
+ final static BytesRef CHECKSUM = new BytesRef("checksum ");
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term ");
@@ -215,11 +216,18 @@
@Override
public void close() throws IOException {
- try {
- write(END);
- newline();
- } finally {
- out.close();
+ if (out != null) {
+ try {
+ write(END);
+ newline();
+ String checksum = Long.toString(out.getChecksum());
+ write(CHECKSUM);
+ write(checksum);
+ newline();
+ } finally {
+ out.close();
+ out = null;
+ }
}
}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java (working copy)
@@ -24,9 +24,9 @@
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@@ -50,6 +50,7 @@
final static BytesRef SIZE = new BytesRef("size ");
final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef END = new BytesRef("END");
+ final static BytesRef CHECKSUM = new BytesRef("checksum ");
@Override
public MutableBits newLiveDocs(int size) throws IOException {
@@ -69,10 +70,10 @@
CharsRef scratchUTF16 = new CharsRef();
String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
- IndexInput in = null;
+ ChecksumIndexInput in = null;
boolean success = false;
try {
- in = dir.openInput(fileName, context);
+ in = dir.openChecksumInput(fileName, context);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, SIZE);
@@ -88,6 +89,8 @@
SimpleTextUtil.readLine(in, scratch);
}
+ SimpleTextUtil.checkFooter(in, CHECKSUM);
+
success = true;
return new SimpleTextBits(bits, size);
} finally {
@@ -127,6 +130,10 @@
SimpleTextUtil.write(out, END);
SimpleTextUtil.writeNewline(out);
+ String checksum = Long.toString(out.getChecksum());
+ SimpleTextUtil.write(out, CHECKSUM);
+ SimpleTextUtil.write(out, checksum, scratch);
+ SimpleTextUtil.writeNewline(out);
success = true;
} finally {
if (success) {
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (working copy)
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
@@ -35,9 +36,9 @@
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
@@ -54,7 +55,7 @@
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
BytesRef scratch = new BytesRef();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
- IndexInput input = directory.openInput(segFileName, context);
+ ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
@@ -96,6 +97,8 @@
String fileName = readString(SI_FILE.length, scratch);
files.add(fileName);
}
+
+ SimpleTextUtil.checkFooter(input, SI_CHECKSUM);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics);
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (working copy)
@@ -47,6 +47,7 @@
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
+ final static BytesRef SI_CHECKSUM = new BytesRef(" checksum ");
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
@@ -55,7 +56,7 @@
si.addFile(segFileName);
boolean success = false;
- IndexOutput output = dir.createOutput(segFileName, ioContext);
+ IndexOutput output = dir.createOutput(segFileName, ioContext);
try {
BytesRef scratch = new BytesRef();
@@ -103,6 +104,11 @@
SimpleTextUtil.writeNewline(output);
}
}
+
+ String checksum = Long.toString(output.getChecksum());
+ SimpleTextUtil.write(output, SI_CHECKSUM);
+ SimpleTextUtil.write(output, checksum, scratch);
+ SimpleTextUtil.writeNewline(output);
success = true;
} finally {
if (!success) {
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (working copy)
@@ -26,6 +26,8 @@
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -78,15 +80,17 @@
// stored fields file in entirety up-front and save the offsets
// so we can seek to the documents later.
private void readIndex(int size) throws IOException {
+ ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
offsets = new long[size];
int upto = 0;
while (!scratch.equals(END)) {
- readLine();
+ SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch, DOC)) {
- offsets[upto] = in.getFilePointer();
+ offsets[upto] = input.getFilePointer();
upto++;
}
}
+ SimpleTextUtil.checkFooter(input, CHECKSUM);
assert upto == offsets.length;
}
@@ -188,6 +192,11 @@
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
}
+ private String readString(int offset, BytesRef scratch) {
+ UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
+ return scratchUTF16.toString();
+ }
+
private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
return a.length == b.length - bOffset &&
ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
@@ -197,4 +206,7 @@
public long ramBytesUsed() {
return 0;
}
+
+ @Override
+ public void validate() throws IOException {}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java (working copy)
@@ -51,13 +51,14 @@
final static BytesRef TYPE_FLOAT = new BytesRef("float");
final static BytesRef TYPE_DOUBLE = new BytesRef("double");
- final static BytesRef END = new BytesRef("END");
- final static BytesRef DOC = new BytesRef("doc ");
- final static BytesRef NUM = new BytesRef(" numfields ");
- final static BytesRef FIELD = new BytesRef(" field ");
- final static BytesRef NAME = new BytesRef(" name ");
- final static BytesRef TYPE = new BytesRef(" type ");
- final static BytesRef VALUE = new BytesRef(" value ");
+ final static BytesRef CHECKSUM = new BytesRef("checksum ");
+ final static BytesRef END = new BytesRef("END");
+ final static BytesRef DOC = new BytesRef("doc ");
+ final static BytesRef NUM = new BytesRef(" numfields ");
+ final static BytesRef FIELD = new BytesRef(" field ");
+ final static BytesRef NAME = new BytesRef(" name ");
+ final static BytesRef TYPE = new BytesRef(" type ");
+ final static BytesRef VALUE = new BytesRef(" value ");
private final BytesRef scratch = new BytesRef();
@@ -171,6 +172,10 @@
}
write(END);
newLine();
+ String checksum = Long.toString(out.getChecksum());
+ write(CHECKSUM);
+ write(checksum);
+ newLine();
}
@Override
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (working copy)
@@ -33,6 +33,8 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -82,15 +84,17 @@
// vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
private void readIndex(int maxDoc) throws IOException {
+ ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
offsets = new long[maxDoc];
int upto = 0;
while (!scratch.equals(END)) {
- readLine();
+ SimpleTextUtil.readLine(input, scratch);
if (StringHelper.startsWith(scratch, DOC)) {
- offsets[upto] = in.getFilePointer();
+ offsets[upto] = input.getFilePointer();
upto++;
}
}
+ SimpleTextUtil.checkFooter(input, CHECKSUM);
assert upto == offsets.length;
}
@@ -537,4 +541,7 @@
public long ramBytesUsed() {
return 0;
}
+
+ @Override
+ public void validate() throws IOException {}
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (working copy)
@@ -37,6 +37,7 @@
*/
public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
+ static final BytesRef CHECKSUM = new BytesRef("checksum ");
static final BytesRef END = new BytesRef("END");
static final BytesRef DOC = new BytesRef("doc ");
static final BytesRef NUMFIELDS = new BytesRef(" numfields ");
@@ -177,6 +178,10 @@
}
write(END);
newLine();
+ String checksum = Long.toString(out.getChecksum());
+ write(CHECKSUM);
+ write(checksum);
+ newLine();
}
@Override
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java (revision 1583220)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java (working copy)
@@ -17,11 +17,16 @@
* limitations under the License.
*/
+import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM;
+
import java.io.IOException;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
class SimpleTextUtil {
@@ -67,4 +72,18 @@
scratch.offset = 0;
scratch.length = upto;
}
+
+ public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException {
+ BytesRef scratch = new BytesRef();
+ String expectedChecksum = Long.toString(input.getChecksum());
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch, prefix);
+ String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString();
+ if (!expectedChecksum.equals(actualChecksum)) {
+ throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
+ }
+ if (input.length() != input.getFilePointer()) {
+ throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")");
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (working copy)
@@ -131,6 +131,11 @@
if (indexVersion != version) {
throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
}
+
+ // verify
+ if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(indexIn);
+ }
// Have PostingsReader init itself
postingsReader.init(in);
@@ -157,7 +162,7 @@
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
- final int longsSize = version >= BlockTreeTermsWriter.TERMS_VERSION_META_ARRAY ? in.readVInt() : 0;
+ final int longsSize = version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@@ -187,9 +192,9 @@
/** Reads terms file header. */
private int readHeader(IndexInput input) throws IOException {
int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
- BlockTreeTermsWriter.TERMS_VERSION_START,
- BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
- if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) {
+ BlockTreeTermsWriter.VERSION_START,
+ BlockTreeTermsWriter.VERSION_CURRENT);
+ if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
dirOffset = input.readLong();
}
return version;
@@ -198,9 +203,9 @@
/** Reads index file header. */
private int readIndexHeader(IndexInput input) throws IOException {
int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
- BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
- BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
- if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+ BlockTreeTermsWriter.VERSION_START,
+ BlockTreeTermsWriter.VERSION_CURRENT);
+ if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
indexDirOffset = input.readLong();
}
return version;
@@ -209,7 +214,10 @@
/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input, long dirOffset)
throws IOException {
- if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+ if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ dirOffset = input.readLong();
+ } else if (version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@@ -2977,4 +2985,15 @@
}
return sizeInByes;
}
+
+ @Override
+ public void validate() throws IOException {
+ if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
+ // term dictionary
+ CodecUtil.checksumEntireFile(in);
+
+ // postings
+ postingsReader.validate();
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (working copy)
@@ -109,7 +109,7 @@
*
* <ul>
* <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
- * FieldSummary, DirOffset</li>
+ * FieldSummary, DirOffset, Footer</li>
* <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
* <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup></li>
* <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
@@ -122,6 +122,7 @@
* FieldNumber,RootCodeLength,DocCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt;
* {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@@ -150,12 +151,13 @@
* when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
* <ul>
* <li>TermsIndex (.tip) --&gt; Header, FSTIndex<sup>NumFields</sup>
- * &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset</li>
+ * &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>IndexStartFP --&gt; {@link DataOutput#writeVLong VLong}</li>
* <!-- TODO: better describe FST output here -->
* <li>FSTIndex --&gt; {@link FST FST&lt;byte[]&gt;}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@@ -178,7 +180,6 @@
* @see BlockTreeTermsReader
* @lucene.experimental
*/
-
public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable {
/** Suggested default value for the {@code
@@ -204,33 +205,24 @@
final static String TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT";
/** Initial terms format. */
- public static final int TERMS_VERSION_START = 0;
+ public static final int VERSION_START = 0;
/** Append-only */
- public static final int TERMS_VERSION_APPEND_ONLY = 1;
+ public static final int VERSION_APPEND_ONLY = 1;
/** Meta data as array */
- public static final int TERMS_VERSION_META_ARRAY = 2;
+ public static final int VERSION_META_ARRAY = 2;
+
+ /** checksums */
+ public static final int VERSION_CHECKSUM = 3;
/** Current terms format. */
- public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_META_ARRAY;
+ public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tip";
final static String TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX";
- /** Initial index format. */
- public static final int TERMS_INDEX_VERSION_START = 0;
-
- /** Append-only */
- public static final int TERMS_INDEX_VERSION_APPEND_ONLY = 1;
-
- /** Meta data as array */
- public static final int TERMS_INDEX_VERSION_META_ARRAY = 2;
-
- /** Current index format. */
- public static final int TERMS_INDEX_VERSION_CURRENT = TERMS_INDEX_VERSION_META_ARRAY;
-
private final IndexOutput out;
private final IndexOutput indexOut;
final int maxDoc;
@@ -326,12 +318,12 @@
/** Writes the terms file header. */
private void writeHeader(IndexOutput out) throws IOException {
- CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
+ CodecUtil.writeHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT);
}
/** Writes the index file header. */
private void writeIndexHeader(IndexOutput out) throws IOException {
- CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT);
+ CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT);
}
/** Writes the terms file trailer. */
@@ -1139,13 +1131,13 @@
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
- if (TERMS_VERSION_CURRENT >= TERMS_VERSION_META_ARRAY) {
- out.writeVInt(field.longsSize);
- }
+ out.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
}
writeTrailer(out, dirStart);
+ CodecUtil.writeFooter(out);
writeIndexTrailer(indexOut, indexDirStart);
+ CodecUtil.writeFooter(indexOut);
} catch (IOException ioe2) {
ioe = ioe2;
} finally {
Index: lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (working copy)
@@ -23,8 +23,12 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
/**
@@ -43,6 +47,10 @@
* Constant to identify the start of a codec header.
*/
public final static int CODEC_MAGIC = 0x3fd76c17;
+ /**
+ * Constant to identify the start of a codec footer.
+ */
+ public final static int FOOTER_MAGIC = ~CODEC_MAGIC;
/**
* Writes a codec header, which records both a string to
@@ -150,4 +158,119 @@
return actualVersion;
}
+
+ /**
+ * Writes a codec footer, which records both a checksum
+ * algorithm ID and a checksum. This footer can
+ * be parsed and validated with
+ * {@link #checkFooter(ChecksumIndexInput) checkFooter()}.
+ * <p>
+ * CodecFooter --&gt; Magic,AlgorithmID,Checksum
+ * <ul>
+ * <li>Magic --&gt; {@link DataOutput#writeInt Uint32}. This
+ * identifies the start of the footer. It is always {@value #FOOTER_MAGIC}.
+ * <li>AlgorithmID --&gt; {@link DataOutput#writeInt Uint32}. This
+ * indicates the checksum algorithm used. Currently this is always 0,
+ * for zlib-crc32.
+ * <li>Checksum --&gt; {@link DataOutput#writeLong Uint32}. The
+ * actual checksum value for all previous bytes in the stream, including
+ * the bytes from Magic and AlgorithmID.
+ * </ul>
+ *
+ * @param out Output stream
+ * @throws IOException If there is an I/O error writing to the underlying medium.
+ */
+ public static void writeFooter(IndexOutput out) throws IOException {
+ out.writeInt(FOOTER_MAGIC);
+ out.writeInt(0);
+ out.writeLong(out.getChecksum());
+ }
+
+ /**
+ * Computes the length of a codec footer.
+ *
+ * @return length of the entire codec footer.
+ * @see #writeFooter(IndexOutput)
+ */
+ public static int footerLength() {
+ return 16;
+ }
+
+ /**
+ * Validates the codec footer previously written by {@link #writeFooter}.
+ * @return actual checksum value
+ * @throws IOException if the footer is invalid, if the checksum does not match,
+ * or if {@code in} is not properly positioned before the footer
+ * at the end of the stream.
+ */
+ public static long checkFooter(ChecksumIndexInput in) throws IOException {
+ validateFooter(in);
+ long actualChecksum = in.getChecksum();
+ long expectedChecksum = in.readLong();
+ if (expectedChecksum != actualChecksum) {
+ throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) +
+ " actual=" + Long.toHexString(actualChecksum) +
+ " (resource=" + in + ")");
+ }
+ if (in.getFilePointer() != in.length()) {
+ throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
+ }
+ return actualChecksum;
+ }
+
+ /**
+ * Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
+ * @return actual checksum value
+ * @throws IOException if the footer is invalid
+ */
+ public static long retrieveChecksum(IndexInput in) throws IOException {
+ in.seek(in.length() - footerLength());
+ validateFooter(in);
+ return in.readLong();
+ }
+
+ private static void validateFooter(IndexInput in) throws IOException {
+ final int magic = in.readInt();
+ if (magic != FOOTER_MAGIC) {
+ throw new CorruptIndexException("codec footer mismatch: actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC + " (resource: " + in + ")");
+ }
+
+ final int algorithmID = in.readInt();
+ if (algorithmID != 0) {
+ throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID);
+ }
+ }
+
+ /**
+ * Checks that the stream is positioned at the end, and throws exception
+ * if it is not.
+ * @deprecated Use {@link #checkFooter} instead, this should only used for files without checksums
+ */
+ @Deprecated
+ public static void checkEOF(IndexInput in) throws IOException {
+ if (in.getFilePointer() != in.length()) {
+ throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
+ }
+ }
+
+ /**
+ * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
+ * <p>
+ * Note that this method may be slow, as it must process the entire file.
+ * If you just need to extract the checksum value, call {@link #retrieveChecksum}.
+ */
+ public static long checksumEntireFile(IndexInput input) throws IOException {
+ IndexInput clone = input.clone();
+ clone.seek(0);
+ ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
+ assert in.getFilePointer() == 0;
+ final byte[] buffer = new byte[1024];
+ long bytesToRead = in.length() - footerLength();
+ for (long skipped = 0; skipped < bytesToRead; ) {
+ final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length);
+ in.readBytes(buffer, 0, toRead);
+ skipped += toRead;
+ }
+ return checkFooter(in);
+ }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java (working copy)
@@ -68,6 +68,14 @@
public abstract long ramBytesUsed();
/**
+ * Validates/runs consistency checks on this producer.
+ * <p>
+ * Note that this may be costly in terms of I/O, e.g.
+ * may involve computing a checksum value against large data files.
+ */
+ public abstract void validate() throws IOException;
+
+ /**
* A simple implementation of {@link DocValuesProducer#getDocsWithField} that
* returns {@code true} if a document has an ordinal &gt;= 0
* <p>
Index: lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java (working copy)
@@ -39,4 +39,12 @@
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
+
+ /**
+ * Validates/runs consistency checks on this reader.
+ * <p>
+ * Note that this may be costly in terms of I/O, e.g.
+ * may involve computing a checksum value against large data files.
+ */
+ public abstract void validate() throws IOException;
}
Index: lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (working copy)
@@ -72,6 +72,14 @@
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
+ /**
+ * Validates/runs consistency checks on this reader.
+ * <p>
+ * Note that this may be costly in terms of I/O, e.g.
+ * may involve computing a checksum value against large data files.
+ */
+ public abstract void validate() throws IOException;
+
@Override
public abstract void close() throws IOException;
}
Index: lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (working copy)
@@ -43,4 +43,12 @@
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
+
+ /**
+ * Validates/runs consistency checks on this reader.
+ * <p>
+ * Note that this may be costly in terms of I/O, e.g.
+ * may involve computing a checksum value against large data files.
+ */
+ public abstract void validate() throws IOException;
}
Index: lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java (working copy)
@@ -45,6 +45,14 @@
/** Returns approximate RAM bytes used */
public abstract long ramBytesUsed();
+ /**
+ * Validates/runs consistency checks on this reader.
+ * <p>
+ * Note that this may be costly in terms of I/O, e.g.
+ * may involve computing a checksum value against large data files.
+ */
+ public abstract void validate() throws IOException;
+
/** Create a clone that one caller at a time may use to
* read term vectors. */
@Override
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java (working copy)
@@ -21,6 +21,7 @@
import java.io.IOException;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.PackedInts;
@@ -52,6 +53,7 @@
* <li>AvgChunkSize --&gt; the average size of a chunk of compressed documents, as a {@link DataOutput#writeVLong VLong}</li>
* <li>BitsPerStartPointerDelta --&gt; number of bits required to represent a delta from the average using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li>
* <li>StartPointerDeltas --&gt; {@link PackedInts packed} array of BlockChunks elements of BitsPerStartPointerDelta bits each, representing the deltas from the average start pointer using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes</p>
* <ul>
@@ -198,6 +200,7 @@
writeBlock();
}
fieldsIndexOut.writeVInt(0); // end marker
+ CodecUtil.writeFooter(fieldsIndexOut);
}
@Override
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (working copy)
@@ -28,6 +28,7 @@
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
@@ -47,6 +48,7 @@
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
@@ -113,17 +115,20 @@
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
- IndexInput indexStream = null;
+ ChecksumIndexInput indexStream = null;
try {
// Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
- indexStream = d.openInput(indexStreamFN, context);
+ indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
- if (indexStream.getFilePointer() != indexStream.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")");
+
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(indexStream);
+ } else {
+ CodecUtil.checkEOF(indexStream);
}
indexStream.close();
indexStream = null;
@@ -509,4 +514,11 @@
return indexReader.ramBytesUsed();
}
+ @Override
+ public void validate() throws IOException {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(fieldsStream);
+ }
+ }
+
}
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (working copy)
@@ -71,7 +71,8 @@
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
static final int VERSION_BIG_CHUNKS = 1;
- static final int VERSION_CURRENT = VERSION_BIG_CHUNKS;
+ static final int VERSION_CHECKSUM = 2;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
private final Directory directory;
private final String segment;
@@ -106,9 +107,11 @@
this.numBufferedDocs = 0;
boolean success = false;
- IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context);
+ IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION),
+ context);
try {
- fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
+ fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION),
+ context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
@@ -314,6 +317,7 @@
throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
}
indexWriter.finish(numDocs);
+ CodecUtil.writeFooter(fieldsStream);
assert bufferedDocs.length == 0;
}
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (working copy)
@@ -28,6 +28,7 @@
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM;
import java.io.Closeable;
import java.io.IOException;
@@ -48,6 +49,7 @@
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -69,6 +71,7 @@
private final FieldInfos fieldInfos;
final CompressingStoredFieldsIndexReader indexReader;
final IndexInput vectorsStream;
+ private final int version;
private final int packedIntsVersion;
private final CompressionMode compressionMode;
private final Decompressor decompressor;
@@ -88,6 +91,7 @@
this.chunkSize = reader.chunkSize;
this.numDocs = reader.numDocs;
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+ this.version = reader.version;
this.closed = false;
}
@@ -99,17 +103,20 @@
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
- IndexInput indexStream = null;
+ ChecksumIndexInput indexStream = null;
try {
// Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
- indexStream = d.openInput(indexStreamFN, context);
+ indexStream = d.openChecksumInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
- int version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
+ version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
- if (indexStream.getFilePointer() != indexStream.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")");
+
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(indexStream);
+ } else {
+ CodecUtil.checkEOF(indexStream);
}
indexStream.close();
indexStream = null;
@@ -1045,5 +1052,12 @@
public long ramBytesUsed() {
return indexReader.ramBytesUsed();
}
+
+ @Override
+ public void validate() throws IOException {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(vectorsStream);
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (working copy)
@@ -66,7 +66,8 @@
static final String CODEC_SFX_DAT = "Data";
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_CHECKSUM = 1;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
static final int BLOCK_SIZE = 64;
@@ -220,9 +221,11 @@
lastTerm = new BytesRef(ArrayUtil.oversize(30, 1));
boolean success = false;
- IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context);
+ IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION),
+ context);
try {
- vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
+ vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION),
+ context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
@@ -659,6 +662,7 @@
throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs);
}
indexWriter.finish(numDocs);
+ CodecUtil.writeFooter(vectorsStream);
}
@Override
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java (working copy)
@@ -21,6 +21,8 @@
import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -198,9 +200,12 @@
// Changed DGaps to encode gaps between cleared bits, not
// set:
public final static int VERSION_DGAPS_CLEARED = 1;
+
+ // added checksum
+ public final static int VERSION_CHECKSUM = 2;
// Increment version to change it:
- public final static int VERSION_CURRENT = VERSION_DGAPS_CLEARED;
+ public final static int VERSION_CURRENT = VERSION_CHECKSUM;
public int getVersion() {
return version;
@@ -221,6 +226,7 @@
} else {
writeBits(output);
}
+ CodecUtil.writeFooter(output);
assert verifyCount();
} finally {
IOUtils.close(output);
@@ -324,7 +330,7 @@
<code>d</code>, as written by the {@link #write} method.
*/
public BitVector(Directory d, String name, IOContext context) throws IOException {
- IndexInput input = d.openInput(name, context);
+ ChecksumIndexInput input = d.openChecksumInput(name, context);
try {
final int firstInt = input.readInt();
@@ -334,8 +340,8 @@
version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_CURRENT);
size = input.readInt();
} else {
- version = VERSION_PRE;
- size = firstInt;
+ // we started writing full header well before 4.0
+ throw new IndexFormatTooOldException(input.toString(), Integer.toString(firstInt));
}
if (size == -1) {
if (version >= VERSION_DGAPS_CLEARED) {
@@ -351,6 +357,11 @@
invertAll();
}
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(input);
+ } else {
+ CodecUtil.checkEOF(input);
+ }
assert verifyCount();
} finally {
input.close();
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java (working copy)
@@ -105,9 +105,7 @@
default:
throw new AssertionError();
}
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
- }
+ CodecUtil.checkEOF(input);
success = true;
} finally {
if (success) {
@@ -327,9 +325,7 @@
PagedBytes bytes = new PagedBytes(16);
bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount());
final PagedBytes.Reader bytesReader = bytes.freeze(true);
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
- }
+ CodecUtil.checkEOF(input);
success = true;
ramBytesUsed.addAndGet(bytes.ramBytesUsed());
return new BinaryDocValues() {
@@ -367,12 +363,8 @@
bytes.copy(data, totalBytes);
final PagedBytes.Reader bytesReader = bytes.freeze(true);
final PackedInts.Reader reader = PackedInts.getReader(index);
- if (data.getFilePointer() != data.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
- }
- if (index.getFilePointer() != index.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
- }
+ CodecUtil.checkEOF(data);
+ CodecUtil.checkEOF(index);
success = true;
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
return new BinaryDocValues() {
@@ -414,12 +406,8 @@
bytes.copy(data, fixedLength * (long) valueCount);
final PagedBytes.Reader bytesReader = bytes.freeze(true);
final PackedInts.Reader reader = PackedInts.getReader(index);
- if (data.getFilePointer() != data.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
- }
- if (index.getFilePointer() != index.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
- }
+ CodecUtil.checkEOF(data);
+ CodecUtil.checkEOF(index);
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
success = true;
return new BinaryDocValues() {
@@ -459,12 +447,8 @@
bytes.copy(data, totalBytes);
final PagedBytes.Reader bytesReader = bytes.freeze(true);
final PackedInts.Reader reader = PackedInts.getReader(index);
- if (data.getFilePointer() != data.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
- }
- if (index.getFilePointer() != index.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
- }
+ CodecUtil.checkEOF(data);
+ CodecUtil.checkEOF(index);
ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed());
success = true;
return new BinaryDocValues() {
@@ -515,12 +499,8 @@
default:
throw new AssertionError();
}
- if (data.getFilePointer() != data.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
- }
- if (index.getFilePointer() != index.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
- }
+ CodecUtil.checkEOF(data);
+ CodecUtil.checkEOF(index);
success = true;
} finally {
if (success) {
@@ -654,4 +634,8 @@
public long ramBytesUsed() {
return ramBytesUsed.get();
}
+
+ @Override
+ public void validate() throws IOException {
+ }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (working copy)
@@ -107,9 +107,7 @@
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes));
}
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
- }
+ CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (working copy)
@@ -1168,4 +1168,7 @@
return 0;
}
+ @Override
+ public void validate() throws IOException {}
+
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (working copy)
@@ -64,9 +64,7 @@
input.readStringStringMap(); // read deprecated attributes
final Set<String> files = input.readStringSet();
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
- }
+ CodecUtil.checkEOF(input);
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
si.setFiles(files);
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (working copy)
@@ -249,4 +249,7 @@
public long ramBytesUsed() {
return 0;
}
+
+ @Override
+ public void validate() throws IOException {}
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (working copy)
@@ -760,5 +760,8 @@
public long ramBytesUsed() {
return 0;
}
+
+ @Override
+ public void validate() throws IOException {}
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (working copy)
@@ -132,6 +132,7 @@
* <li>Header, --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedBlockSize, SingletonDocID --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --&gt; {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@@ -190,7 +191,7 @@
* each packed or VInt block, when the length of document list is larger than packed block size.</p>
*
* <ul>
- * <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup></li>
+ * <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermFreqs --&gt; &lt;PackedBlock&gt; <sup>PackedDocBlockNum</sup>,
* VIntBlock? </li>
@@ -206,6 +207,7 @@
* --&gt;
* {@link DataOutput#writeVInt VInt}</li>
* <li>SkipChildLevelPointer --&gt; {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@@ -273,7 +275,7 @@
* <p>The .pos file contains the lists of positions that each term occurs at within documents. It also
* sometimes stores part of payloads and offsets for speedup.</p>
* <ul>
- * <li>PosFile(.pos) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup></li>
+ * <li>PosFile(.pos) --&gt; Header, &lt;TermPositions&gt; <sup>TermCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermPositions --&gt; &lt;PackedPosDeltaBlock&gt; <sup>PackedPosBlockNum</sup>,
* VIntBlock? </li>
@@ -283,6 +285,7 @@
* <li>PositionDelta, OffsetDelta, OffsetLength --&gt;
* {@link DataOutput#writeVInt VInt}</li>
* <li>PayloadData --&gt; {@link DataOutput#writeByte byte}<sup>PayLength</sup></li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@@ -325,7 +328,7 @@
* <p>The .pay file will store payloads and offsets associated with certain term-document positions.
* Some payloads and offsets will be separated out into .pos file, for performance reasons.</p>
* <ul>
- * <li>PayFile(.pay): --&gt; Header, &lt;TermPayloads, TermOffsets?&gt; <sup>TermCount</sup></li>
+ * <li>PayFile(.pay): --&gt; Header, &lt;TermPayloads, TermOffsets?&gt; <sup>TermCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermPayloads --&gt; &lt;PackedPayLengthBlock, SumPayLength, PayData&gt; <sup>PackedPayBlockNum</sup>
* <li>TermOffsets --&gt; &lt;PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock&gt; <sup>PackedPayBlockNum</sup>
@@ -332,6 +335,7 @@
* <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --&gt; {@link PackedInts PackedInts}</li>
* <li>SumPayLength --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>PayData --&gt; {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (working copy)
@@ -35,7 +35,6 @@
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -1547,4 +1546,18 @@
return 0;
}
+ @Override
+ public void validate() throws IOException {
+ if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ if (docIn != null) {
+ CodecUtil.checksumEntireFile(docIn);
+ }
+ if (posIn != null) {
+ CodecUtil.checksumEntireFile(posIn);
+ }
+ if (payIn != null) {
+ CodecUtil.checksumEntireFile(payIn);
+ }
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java (working copy)
@@ -64,11 +64,12 @@
// Increment version to change it
final static int VERSION_START = 0;
final static int VERSION_META_ARRAY = 1;
- final static int VERSION_CURRENT = VERSION_META_ARRAY;
+ final static int VERSION_CHECKSUM = 2;
+ final static int VERSION_CURRENT = VERSION_CHECKSUM;
- final IndexOutput docOut;
- final IndexOutput posOut;
- final IndexOutput payOut;
+ IndexOutput docOut;
+ IndexOutput posOut;
+ IndexOutput payOut;
final static IntBlockTermState emptyState = new IntBlockTermState();
IntBlockTermState lastState;
@@ -113,7 +114,7 @@
super();
docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION),
- state.context);
+ state.context);
IndexOutput posOut = null;
IndexOutput payOut = null;
boolean success = false;
@@ -123,7 +124,7 @@
if (state.fieldInfos.hasProx()) {
posDeltaBuffer = new int[MAX_DATA_SIZE];
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
- state.context);
+ state.context);
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
if (state.fieldInfos.hasPayloads()) {
@@ -144,7 +145,7 @@
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
- state.context);
+ state.context);
CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
}
} else {
@@ -569,6 +570,26 @@
@Override
public void close() throws IOException {
- IOUtils.close(docOut, posOut, payOut);
+ // TODO: add a finish() at least to PushBase? DV too...?
+ boolean success = false;
+ try {
+ if (docOut != null) {
+ CodecUtil.writeFooter(docOut);
+ }
+ if (posOut != null) {
+ CodecUtil.writeFooter(posOut);
+ }
+ if (payOut != null) {
+ CodecUtil.writeFooter(payOut);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(docOut, posOut, payOut);
+ } else {
+ IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+ }
+ docOut = posOut = payOut = null;
+ }
}
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (working copy)
@@ -68,7 +68,7 @@
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
- * <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup></p>
+ * <p>DocValues metadata (.dvm) --&gt; Header,&lt;FieldNumber,EntryType,Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry</li>
* <li>NumericEntry --&gt; DataOffset,CompressionType,PackedVersion</li>
@@ -78,6 +78,7 @@
* <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>EntryType,CompressionType --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
@@ -105,7 +106,7 @@
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
- * <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup></p>
+ * <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
@@ -114,6 +115,7 @@
* <li>TableCompressedNumerics --&gt; TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
* <li>UncompressedNumerics --&gt; {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (working copy)
@@ -37,6 +37,7 @@
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -64,6 +65,7 @@
private final Map<Integer,BinaryEntry> binaries;
private final Map<Integer,FSTEntry> fsts;
private final IndexInput data;
+ private final int version;
// ram instances we have already loaded
private final Map<Integer,NumericDocValues> numericInstances =
@@ -89,16 +91,16 @@
static final int VERSION_START = 0;
static final int VERSION_GCD_COMPRESSION = 1;
- static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
+ static final int VERSION_CHECKSUM = 2;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
- IndexInput in = state.directory.openInput(metaName, state.context);
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
boolean success = false;
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
- final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
@@ -108,8 +110,10 @@
fsts = new HashMap<>();
readFields(in, state.fieldInfos);
- if (in.getFilePointer() != in.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(in);
+ } else {
+ CodecUtil.checkEOF(in);
}
success = true;
@@ -199,6 +203,13 @@
return ramBytesUsed.get();
}
+ @Override
+ public void validate() throws IOException {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
+
private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.number);
data.seek(entry.offset);
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java (working copy)
@@ -92,9 +92,7 @@
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
}
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
- }
+ CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java (working copy)
@@ -34,14 +34,12 @@
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
import org.apache.lucene.util.packed.PackedInts;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT;
+
/**
* Writer for {@link Lucene42NormsFormat}
*/
-class Lucene42NormsConsumer extends DocValuesConsumer {
- static final int VERSION_START = 0;
- static final int VERSION_GCD_COMPRESSION = 1;
- static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
-
+class Lucene42NormsConsumer extends DocValuesConsumer {
static final byte NUMBER = 0;
static final int BLOCK_SIZE = 4096;
@@ -51,7 +49,7 @@
static final byte UNCOMPRESSED = 2;
static final byte GCD_COMPRESSED = 3;
- final IndexOutput data, meta;
+ IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;
@@ -181,7 +179,11 @@
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
}
+ if (data != null) {
+ CodecUtil.writeFooter(data); // write checksum
+ }
success = true;
} finally {
if (success) {
@@ -189,6 +191,7 @@
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
+ meta = data = null;
}
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java (working copy)
@@ -59,7 +59,7 @@
* {@link BlockPackedWriter blocks of packed ints} for positions.</p>
* <p>Here is a more detailed description of the field data file format:</p>
* <ul>
- * <li>VectorData (.tvd) --&gt; &lt;Header&gt;, PackedIntsVersion, ChunkSize, &lt;Chunk&gt;<sup>ChunkCount</sup></li>
+ * <li>VectorData (.tvd) --&gt; &lt;Header&gt;, PackedIntsVersion, ChunkSize, &lt;Chunk&gt;<sup>ChunkCount</sup>, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>PackedIntsVersion --&gt; {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
* <li>ChunkSize is the number of bytes of terms to accumulate before flushing, as a {@link DataOutput#writeVInt VInt}</li>
@@ -107,14 +107,16 @@
* <li>FieldTermsAndPayLoads --&gt; Terms (Payloads)</li>
* <li>Terms: term bytes</li>
* <li>Payloads: payload bytes (if the field has payloads)</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </li>
* <li><a name="vector_index" id="vector_index"></a>
* <p>An index file (extension <tt>.tvx</tt>).</p>
* <ul>
- * <li>VectorIndex (.tvx) --&gt; &lt;Header&gt;, &lt;ChunkIndex&gt;</li>
+ * <li>VectorIndex (.tvx) --&gt; &lt;Header&gt;, &lt;ChunkIndex&gt;, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </li>
* </ol>
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java (working copy)
@@ -66,7 +66,7 @@
* of indirection: docId -> ord. */
public static final int SORTED_SET_SINGLE_VALUED_SORTED = 1;
- final IndexOutput data, meta;
+ IndexOutput data, meta;
final int maxDoc;
/** expert: Creates a new writer */
@@ -438,7 +438,11 @@
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
}
+ if (data != null) {
+ CodecUtil.writeFooter(data); // write checksum
+ }
success = true;
} finally {
if (success) {
@@ -446,6 +450,7 @@
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
+ meta = data = null;
}
}
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java (working copy)
@@ -89,7 +89,7 @@
* <p>The DocValues metadata or .dvm file.</p>
* <p>For DocValues field, this stores metadata, such as the offset into the
* DocValues data (.dvd)</p>
- * <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup></p>
+ * <p>DocValues metadata (.dvm) --&gt; Header,&lt;Entry&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>Entry --&gt; NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
* <li>NumericEntry --&gt; GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
@@ -109,6 +109,7 @@
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --&gt; {@link DataOutput#writeLong Int64}</li>
* <li>TableSize --&gt; {@link DataOutput#writeVInt vInt}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
@@ -138,10 +139,13 @@
* is written for the addresses.
* <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
* If its -1, then there are no missing values.
+ * <p>Checksum contains the CRC32 checksum of all bytes in the .dvm file up
+ * until the checksum. This is used to verify integrity of the file on opening the
+ * index.
* <li><a name="dvd" id="dvd"></a>
* <p>The DocValues data or .dvd file.</p>
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
- * <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup></p>
+ * <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup>,Footer</p>
* <ul>
* <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li>
* <li>BinaryData --&gt; {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
@@ -150,6 +154,7 @@
* <li>TableCompressedNumerics --&gt; {@link PackedInts PackedInts}</li>
* <li>GCDCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
* <li>Addresses --&gt; {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
@@ -179,7 +184,8 @@
static final String META_EXTENSION = "dvm";
static final int VERSION_START = 0;
static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1;
- static final int VERSION_CURRENT = VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED;
+ static final int VERSION_CHECKSUM = 2;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
static final byte NUMERIC = 0;
static final byte BINARY = 1;
static final byte SORTED = 2;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (working copy)
@@ -50,6 +50,7 @@
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -80,7 +81,7 @@
protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
- IndexInput in = state.directory.openInput(metaName, state.context);
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
this.maxDoc = state.segmentInfo.getDocCount();
boolean success = false;
try {
@@ -94,8 +95,10 @@
sortedSets = new HashMap<>();
readFields(in, state.fieldInfos);
- if (in.getFilePointer() != in.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")");
+ if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(in);
+ } else {
+ CodecUtil.checkEOF(in);
}
success = true;
@@ -299,6 +302,13 @@
return ramBytesUsed.get();
}
+ @Override
+ public void validate() throws IOException {
+ if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
+
LongValues getNumeric(NumericEntry entry) throws IOException {
final IndexInput data = this.data.clone();
data.seek(entry.offset);
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java (working copy)
@@ -32,7 +32,7 @@
* <p>
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
* <p>FieldInfos (.fnm) --&gt; Header,FieldsCount, &lt;FieldName,FieldNumber,
- * FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup></p>
+ * FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup>,Footer</p>
* <p>Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#checkHeader CodecHeader}</li>
@@ -42,6 +42,7 @@
* <li>FieldNumber --&gt; {@link DataOutput#writeInt VInt}</li>
* <li>Attributes --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>DocValuesGen --&gt; {@link DataOutput#writeLong(long) Int64}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
@@ -113,7 +114,8 @@
// Codec header
static final String CODEC_NAME = "Lucene46FieldInfos";
static final int FORMAT_START = 0;
- static final int FORMAT_CURRENT = FORMAT_START;
+ static final int FORMAT_CHECKSUM = 1;
+ static final int FORMAT_CURRENT = FORMAT_CHECKSUM;
// Field flags
static final byte IS_INDEXED = 0x1;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java (working copy)
@@ -29,6 +29,7 @@
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -49,13 +50,13 @@
@Override
public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
- IndexInput input = directory.openInput(fileName, context);
+ ChecksumIndexInput input = directory.openChecksumInput(fileName, context);
boolean success = false;
try {
- CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
- Lucene46FieldInfosFormat.FORMAT_START,
- Lucene46FieldInfosFormat.FORMAT_CURRENT);
+ int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
+ Lucene46FieldInfosFormat.FORMAT_START,
+ Lucene46FieldInfosFormat.FORMAT_CURRENT);
final int size = input.readVInt(); //read in the size
FieldInfo infos[] = new FieldInfo[size];
@@ -91,9 +92,11 @@
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
infos[i].setDocValuesGen(dvGen);
}
-
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
+
+ if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
+ CodecUtil.checkFooter(input);
+ } else {
+ CodecUtil.checkEOF(input);
}
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java (working copy)
@@ -26,9 +26,9 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
@@ -81,6 +81,7 @@
output.writeLong(fi.getDocValuesGen());
output.writeStringStringMap(fi.attributes());
}
+ CodecUtil.writeFooter(output);
success = true;
} finally {
if (success) {
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (working copy)
@@ -31,7 +31,7 @@
* <p>
* Files:
* <ul>
- * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files
+ * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
* </ul>
* </p>
* Data types:
@@ -43,6 +43,7 @@
* <li>Files --&gt; {@link DataOutput#writeStringSet Set&lt;String&gt;}</li>
* <li>Diagnostics --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>IsCompoundFile --&gt; {@link DataOutput#writeByte Int8}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
@@ -53,9 +54,6 @@
* <li>IsCompoundFile records whether the segment is written as a compound file or
* not. If this is -1, the segment is not a compound file. If it is 1, the segment
* is a compound file.</li>
- * <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
- * until the checksum. This is used to verify integrity of the file on opening the
- * index.</li>
* <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid,
* for each segment it creates. It includes metadata like the current Lucene
* version, OS, Java version, why the segment was created (merge, flush,
@@ -89,5 +87,6 @@
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene46SegmentInfo";
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_CHECKSUM = 1;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (working copy)
@@ -26,9 +26,9 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
/**
@@ -46,12 +46,12 @@
@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
- final IndexInput input = dir.openInput(fileName, context);
+ final ChecksumIndexInput input = dir.openChecksumInput(fileName, context);
boolean success = false;
try {
- CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
- Lucene46SegmentInfoFormat.VERSION_START,
- Lucene46SegmentInfoFormat.VERSION_CURRENT);
+ int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
+ Lucene46SegmentInfoFormat.VERSION_START,
+ Lucene46SegmentInfoFormat.VERSION_CURRENT);
final String version = input.readString();
final int docCount = input.readInt();
if (docCount < 0) {
@@ -61,8 +61,10 @@
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
- if (input.getFilePointer() != input.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
+ if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(input);
+ } else {
+ CodecUtil.checkEOF(input);
}
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (working copy)
@@ -59,7 +59,7 @@
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
-
+ CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html (working copy)
@@ -383,6 +383,9 @@
<li>In version 4.5, DocValues were extended to explicitly represent missing values.</li>
<li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
allow updating NumericDocValues fields.</li>
+<li>In version 4.8, checksum footers were added to the end of each index file
+for improved data integrity. Specifically, the last 8 bytes of every index file
+contain the zlib-crc32 checksum of the file.</li>
</ul>
<a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2>
Index: lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (working copy)
@@ -310,6 +310,13 @@
}
return size;
}
+
+ @Override
+ public void validate() throws IOException {
+ for (DocValuesProducer format : formats.values()) {
+ format.validate();
+ }
+ }
}
@Override
Index: lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (working copy)
@@ -246,6 +246,13 @@
}
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {
+ for (FieldsProducer producer : formats.values()) {
+ producer.validate();
+ }
+ }
}
@Override
Index: lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (working copy)
@@ -238,4 +238,12 @@
* synchronization.
*/
public abstract Bits getLiveDocs();
+
+ /**
+ * Validates/runs consistency checks on this reader.
+ * <p>
+ * Note that this may be costly in terms of I/O, e.g.
+ * may involve computing a checksum value against large data files.
+ */
+ public abstract void validate() throws IOException;
}
Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy)
@@ -536,6 +536,10 @@
reader = new SegmentReader(info, IOContext.DEFAULT);
segInfoStat.openReaderPassed = true;
+
+ if (infoStream != null)
+ infoStream.print(" test: validate reader.........");
+ reader.validate();
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
Index: lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (working copy)
@@ -423,4 +423,9 @@
return in.getDocsWithField(field);
}
+ @Override
+ public void validate() throws IOException {
+ ensureOpen();
+ in.validate();
+ }
}
Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -2657,7 +2657,8 @@
false, codec, null);
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
- MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
+ MergeState.CheckAbort.NONE, globalFieldNumberMap,
+ context, config.getValidateAtMerge());
if (!merger.shouldMerge()) {
return;
@@ -4057,7 +4058,8 @@
// OneMerge to return a view over the actual segments to merge
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
merge.info.info, infoStream, dirWrapper,
- checkAbort, globalFieldNumberMap, context);
+ checkAbort, globalFieldNumberMap,
+ context, config.getValidateAtMerge());
merge.checkAborted(directory);
Index: lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy)
@@ -110,6 +110,12 @@
* (set to <code>true</code>). For batch indexing with very large
* ram buffers use <code>false</code> */
public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true;
+
+ /** Default value for calling {@link AtomicReader#validate()} before
+ * merging segments (set to <code>false</code>). You can set this
+ * to <code>true</code> for additional safety. */
+ public final static boolean DEFAULT_VALIDATE_AT_MERGE = false;
+
/**
* Sets the default (for any instance) maximum time to wait for a write lock
* (in milliseconds).
Index: lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java (working copy)
@@ -97,6 +97,9 @@
/** True if segment flushes should use compound file format */
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
+
+ /** True if merging should validate segments first */
+ protected volatile boolean validateAtMerge = IndexWriterConfig.DEFAULT_VALIDATE_AT_MERGE;
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
@@ -152,6 +155,7 @@
flushPolicy = config.getFlushPolicy();
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
useCompoundFile = config.getUseCompoundFile();
+ validateAtMerge = config.getValidateAtMerge();
}
/** Returns the default analyzer to use for indexing documents. */
@@ -475,6 +479,26 @@
return useCompoundFile ;
}
+ /**
+ * Sets if {@link IndexWriter} should call {@link AtomicReader#validate()}
+ * on existing segments before merging them into a new one.
+ * <p>
+ * Use <code>true</code> to enable this safety check, which can help
+ * reduce the risk of propagating index corruption from older segments
+ * into new ones, at the expense of slower merging.
+ * </p>
+ */
+ public LiveIndexWriterConfig setValidateAtMerge(boolean validateAtMerge) {
+ this.validateAtMerge = validateAtMerge;
+ return this;
+ }
+
+ /** Returns true if {@link AtomicReader#validate()} is called before
+ * merging segments. */
+ public boolean getValidateAtMerge() {
+ return validateAtMerge;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -499,6 +523,7 @@
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
+ sb.append("validateAtMerge=").append(getValidateAtMerge()).append("\n");
return sb.toString();
}
Index: lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (working copy)
@@ -299,4 +299,12 @@
NumericDocValues values = reader == null ? null : reader.getNormValues(field);
return values;
}
+
+ @Override
+ public void validate() throws IOException {
+ ensureOpen();
+ for (AtomicReader reader : completeReaderSet) {
+ reader.validate();
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (working copy)
@@ -95,14 +95,4 @@
IOUtils.reThrow(t);
}
}
-
- /** Returns approximate RAM bytes used. */
- synchronized long ramBytesUsed() {
- long ramBytesUsed = 0;
- for (RefCount<DocValuesProducer> dvp : genDVProducers.values()) {
- ramBytesUsed += dvp.get().ramBytesUsed();
- }
- return ramBytesUsed;
- }
-
}
Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (working copy)
@@ -36,11 +36,9 @@
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.store.ChecksumIndexInput;
-import org.apache.lucene.store.ChecksumIndexOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.IOUtils;
@@ -69,10 +67,10 @@
* <p>
* Files:
* <ul>
- * <li><tt>segments.gen</tt>: GenHeader, Generation, Generation
+ * <li><tt>segments.gen</tt>: GenHeader, Generation, Generation, Footer
* <li><tt>segments_N</tt>: Header, Version, NameCounter, SegCount,
* &lt;SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles&gt;<sup>SegCount</sup>,
- * CommitUserData, Checksum
+ * CommitUserData, Footer
* </ul>
* </p>
* Data types:
@@ -84,6 +82,7 @@
* <li>SegName, SegCodec --&gt; {@link DataOutput#writeString String}</li>
* <li>CommitUserData --&gt; {@link DataOutput#writeStringStringMap Map&lt;String,String&gt;}</li>
* <li>UpdatesFiles --&gt; {@link DataOutput#writeStringSet(Set) Set&lt;String&gt;}</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* </p>
* Field Descriptions:
@@ -98,9 +97,6 @@
* there are no deletes. Anything above zero means there are deletes
* stored by {@link LiveDocsFormat}.</li>
* <li>DeletionCount records the number of deleted documents in this segment.</li>
- * <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up
- * until the checksum. This is used to verify integrity of the file on opening the
- * index.</li>
* <li>SegCodec is the {@link Codec#getName() name} of the Codec that encoded
* this segment.</li>
* <li>CommitUserData stores an optional user-supplied opaque
@@ -122,10 +118,17 @@
/** The file format version for the segments_N codec header, since 4.6+. */
public static final int VERSION_46 = 1;
+
+ /** The file format version for the segments_N codec header, since 4.8+ */
+ public static final int VERSION_48 = 2;
- /** Used for the segments.gen file only!
- * Whenever you add a new format, make it 1 smaller (negative version logic)! */
- public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
+ // Used for the segments.gen file only!
+ // Whenever you add a new format, make it 1 smaller (negative version logic)!
+ private static final int FORMAT_SEGMENTS_GEN_47 = -2;
+ private static final int FORMAT_SEGMENTS_GEN_CHECKSUM = -3;
+ private static final int FORMAT_SEGMENTS_GEN_START = FORMAT_SEGMENTS_GEN_47;
+ /** Current format of segments.gen */
+ public static final int FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM;
/** Used to name new segments. */
public int counter;
@@ -266,6 +269,7 @@
genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT);
genOutput.writeLong(generation);
genOutput.writeLong(generation);
+ CodecUtil.writeFooter(genOutput);
} finally {
genOutput.close();
dir.sync(Collections.singleton(IndexFileNames.SEGMENTS_GEN));
@@ -317,7 +321,7 @@
lastGeneration = generation;
- ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
+ ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ);
try {
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
// to read the magic ourselves.
@@ -326,7 +330,7 @@
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
}
// 4.0+
- int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_46);
+ int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_48);
version = input.readLong();
counter = input.readInt();
int numSegments = input.readInt();
@@ -366,10 +370,15 @@
}
userData = input.readStringStringMap();
- final long checksumNow = input.getChecksum();
- final long checksumThen = input.readLong();
- if (checksumNow != checksumThen) {
- throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
+ if (format >= VERSION_48) {
+ CodecUtil.checkFooter(input);
+ } else {
+ final long checksumNow = input.getChecksum();
+ final long checksumThen = input.readLong();
+ if (checksumNow != checksumThen) {
+ throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
+ }
+ CodecUtil.checkEOF(input);
}
success = true;
@@ -402,7 +411,7 @@
// Only non-null after prepareCommit has been called and
// before finishCommit is called
- ChecksumIndexOutput pendingSegnOutput;
+ IndexOutput pendingSegnOutput;
private void write(Directory directory) throws IOException {
@@ -415,12 +424,12 @@
generation++;
}
- ChecksumIndexOutput segnOutput = null;
+ IndexOutput segnOutput = null;
boolean success = false;
try {
- segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName, IOContext.DEFAULT));
- CodecUtil.writeHeader(segnOutput, "segments", VERSION_46);
+ segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
+ CodecUtil.writeHeader(segnOutput, "segments", VERSION_48);
segnOutput.writeLong(version);
segnOutput.writeInt(counter); // write counter
segnOutput.writeInt(size()); // write infos
@@ -641,9 +650,9 @@
// a stale cache (NFS) we have a better chance of
// getting the right generation.
long genB = -1;
- IndexInput genInput = null;
+ ChecksumIndexInput genInput = null;
try {
- genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
+ genInput = directory.openChecksumInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
} catch (IOException e) {
if (infoStream != null) {
message("segments.gen open: IOException " + e);
@@ -653,18 +662,23 @@
if (genInput != null) {
try {
int version = genInput.readInt();
- if (version == FORMAT_SEGMENTS_GEN_CURRENT) {
+ if (version == FORMAT_SEGMENTS_GEN_47 || version == FORMAT_SEGMENTS_GEN_CHECKSUM) {
long gen0 = genInput.readLong();
long gen1 = genInput.readLong();
if (infoStream != null) {
message("fallback check: " + gen0 + "; " + gen1);
}
+ if (version == FORMAT_SEGMENTS_GEN_CHECKSUM) {
+ CodecUtil.checkFooter(genInput);
+ } else {
+ CodecUtil.checkEOF(genInput);
+ }
if (gen0 == gen1) {
// The file is consistent.
genB = gen0;
}
} else {
- throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT);
+ throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_START, FORMAT_SEGMENTS_GEN_CURRENT);
}
} catch (IOException err2) {
// rethrow any format exception
@@ -863,7 +877,7 @@
}
boolean success = false;
try {
- pendingSegnOutput.finishCommit();
+ CodecUtil.writeFooter(pendingSegnOutput);
success = true;
} finally {
if (!success) {
Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy)
@@ -52,7 +52,13 @@
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
- MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
+ MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException {
+ // validate incoming readers
+ if (validate) {
+ for (AtomicReader reader : readers) {
+ reader.validate();
+ }
+ }
mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
directory = dir;
this.codec = segmentInfo.getCodec();
Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -33,10 +33,13 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
/**
* IndexReader implementation over a single segment.
@@ -72,7 +75,8 @@
}
};
- final Map<String,DocValuesProducer> dvProducers = new HashMap<>();
+ final Map<String,DocValuesProducer> dvProducersByField = new HashMap<>();
+ final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
final FieldInfos fieldInfos;
@@ -177,12 +181,15 @@
// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gens=" + genInfos.keySet());
+ // TODO: can we avoid iterating over fieldinfos several times and creating maps of all this stuff if dv updates do not exist?
+
for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
Long gen = e.getKey();
List<FieldInfo> infos = e.getValue();
DocValuesProducer dvp = segDocValues.getDocValuesProducer(gen, si, IOContext.READ, dir, dvFormat, infos);
for (FieldInfo fi : infos) {
- dvProducers.put(fi.name, dvp);
+ dvProducersByField.put(fi.name, dvp);
+ dvProducers.add(dvp);
}
}
@@ -250,7 +257,7 @@
try {
core.decRef();
} finally {
- dvProducers.clear();
+ dvProducersByField.clear();
try {
IOUtils.close(docValuesLocal, docsWithFieldLocal);
} finally {
@@ -395,13 +402,12 @@
return null;
}
- DocValuesProducer dvProducer = dvProducers.get(field);
- assert dvProducer != null;
-
Map<String,Object> dvFields = docValuesLocal.get();
NumericDocValues dvs = (NumericDocValues) dvFields.get(field);
if (dvs == null) {
+ DocValuesProducer dvProducer = dvProducersByField.get(field);
+ assert dvProducer != null;
dvs = dvProducer.getNumeric(fi);
dvFields.put(field, dvs);
}
@@ -422,13 +428,12 @@
return null;
}
- DocValuesProducer dvProducer = dvProducers.get(field);
- assert dvProducer != null;
-
Map<String,Bits> dvFields = docsWithFieldLocal.get();
Bits dvs = dvFields.get(field);
if (dvs == null) {
+ DocValuesProducer dvProducer = dvProducersByField.get(field);
+ assert dvProducer != null;
dvs = dvProducer.getDocsWithField(fi);
dvFields.put(field, dvs);
}
@@ -444,13 +449,12 @@
return null;
}
- DocValuesProducer dvProducer = dvProducers.get(field);
- assert dvProducer != null;
-
Map<String,Object> dvFields = docValuesLocal.get();
BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field);
if (dvs == null) {
+ DocValuesProducer dvProducer = dvProducersByField.get(field);
+ assert dvProducer != null;
dvs = dvProducer.getBinary(fi);
dvFields.put(field, dvs);
}
@@ -466,13 +470,12 @@
return null;
}
- DocValuesProducer dvProducer = dvProducers.get(field);
- assert dvProducer != null;
-
Map<String,Object> dvFields = docValuesLocal.get();
SortedDocValues dvs = (SortedDocValues) dvFields.get(field);
if (dvs == null) {
+ DocValuesProducer dvProducer = dvProducersByField.get(field);
+ assert dvProducer != null;
dvs = dvProducer.getSorted(fi);
dvFields.put(field, dvs);
}
@@ -488,13 +491,12 @@
return null;
}
- DocValuesProducer dvProducer = dvProducers.get(field);
- assert dvProducer != null;
-
Map<String,Object> dvFields = docValuesLocal.get();
SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field);
if (dvs == null) {
+ DocValuesProducer dvProducer = dvProducersByField.get(field);
+ assert dvProducer != null;
dvs = dvProducer.getSortedSet(fi);
dvFields.put(field, dvs);
}
@@ -548,8 +550,10 @@
public long ramBytesUsed() {
ensureOpen();
long ramBytesUsed = 0;
- if (segDocValues != null) {
- ramBytesUsed += segDocValues.ramBytesUsed();
+ if (dvProducers != null) {
+ for (DocValuesProducer producer : dvProducers) {
+ ramBytesUsed += producer.ramBytesUsed();
+ }
}
if (core != null) {
ramBytesUsed += core.ramBytesUsed();
@@ -556,4 +560,35 @@
}
return ramBytesUsed;
}
+
+ @Override
+ public void validate() throws IOException {
+ ensureOpen();
+
+ // stored fields
+ getFieldsReader().validate();
+
+ // term vectors
+ TermVectorsReader termVectorsReader = getTermVectorsReader();
+ if (termVectorsReader != null) {
+ termVectorsReader.validate();
+ }
+
+ // terms/postings
+ if (core.fields != null) {
+ core.fields.validate();
+ }
+
+ // norms
+ if (core.normsProducer != null) {
+ core.normsProducer.validate();
+ }
+
+ // docvalues
+ if (dvProducers != null) {
+ for (DocValuesProducer producer : dvProducers) {
+ producer.validate();
+ }
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java (working copy)
@@ -239,4 +239,12 @@
// TODO: as this is a wrapper, should we really close the delegate?
in.close();
}
+
+ @Override
+ public void validate() throws IOException {
+ ensureOpen();
+ for (AtomicReaderContext ctx : in.leaves()) {
+ ctx.reader().validate();
+ }
+ }
}
Index: lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java (revision 0)
+++ lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java (working copy)
@@ -0,0 +1,84 @@
+package org.apache.lucene.store;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.zip.Checksum;
+
+/**
+ * Wraps another {@link Checksum} with an internal buffer
+ * to speed up checksum calculations.
+ */
+public class BufferedChecksum implements Checksum {
+ private final Checksum in;
+ private final byte buffer[];
+ private int upto;
+ /** Default buffer size: 256 */
+ public static final int DEFAULT_BUFFERSIZE = 256;
+
+ /** Create a new BufferedChecksum with {@link #DEFAULT_BUFFERSIZE} */
+ public BufferedChecksum(Checksum in) {
+ this(in, DEFAULT_BUFFERSIZE);
+ }
+
+ /** Create a new BufferedChecksum with the specified bufferSize */
+ public BufferedChecksum(Checksum in, int bufferSize) {
+ this.in = in;
+ this.buffer = new byte[bufferSize];
+ }
+
+ @Override
+ public void update(int b) {
+ if (upto == buffer.length) {
+ flush();
+ }
+ buffer[upto++] = (byte) b;
+ }
+
+ @Override
+ public void update(byte[] b, int off, int len) {
+ if (len >= buffer.length) {
+ flush();
+ in.update(b, off, len);
+ } else {
+ if (upto + len > buffer.length) {
+ flush();
+ }
+ System.arraycopy(b, off, buffer, upto, len);
+ upto += len;
+ }
+ }
+
+ @Override
+ public long getValue() {
+ flush();
+ return in.getValue();
+ }
+
+ @Override
+ public void reset() {
+ upto = 0;
+ in.reset();
+ }
+
+ private void flush() {
+ if (upto > 0) {
+ in.update(buffer, 0, upto);
+ }
+ upto = 0;
+ }
+}
Property changes on: lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java (revision 0)
+++ lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java (working copy)
@@ -0,0 +1,72 @@
+package org.apache.lucene.store;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+/**
+ * Simple implementation of {@link ChecksumIndexInput} that wraps
+ * another input and delegates calls.
+ */
+public class BufferedChecksumIndexInput extends ChecksumIndexInput {
+ final IndexInput main;
+ final Checksum digest;
+
+ /** Creates a new BufferedChecksumIndexInput */
+ public BufferedChecksumIndexInput(IndexInput main) {
+ super("BufferedChecksumIndexInput(" + main + ")");
+ this.main = main;
+ this.digest = new BufferedChecksum(new CRC32());
+ }
+
+ @Override
+ public byte readByte() throws IOException {
+ final byte b = main.readByte();
+ digest.update(b);
+ return b;
+ }
+
+ @Override
+ public void readBytes(byte[] b, int offset, int len)
+ throws IOException {
+ main.readBytes(b, offset, len);
+ digest.update(b, offset, len);
+ }
+
+ @Override
+ public long getChecksum() {
+ return digest.getValue();
+ }
+
+ @Override
+ public void close() throws IOException {
+ main.close();
+ }
+
+ @Override
+ public long getFilePointer() {
+ return main.getFilePointer();
+ }
+
+ @Override
+ public long length() {
+ return main.length();
+ }
+}
Property changes on: lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import java.util.zip.CRC32;
/** Base implementation class for buffered {@link IndexOutput}. */
public abstract class BufferedIndexOutput extends IndexOutput {
@@ -28,6 +29,7 @@
private final byte[] buffer;
private long bufferStart = 0; // position in file of buffer
private int bufferPosition = 0; // position in buffer
+ private final CRC32 crc = new CRC32();
/**
* Creates a new {@link BufferedIndexOutput} with the default buffer size
@@ -75,6 +77,7 @@
if (bufferPosition > 0)
flush();
// and write data at once
+ crc.update(b, offset, length);
flushBuffer(b, offset, length);
bufferStart += length;
} else {
@@ -99,6 +102,7 @@
@Override
public void flush() throws IOException {
+ crc.update(buffer, 0, bufferPosition);
flushBuffer(buffer, bufferPosition);
bufferStart += bufferPosition;
bufferPosition = 0;
@@ -141,4 +145,9 @@
return bufferSize;
}
+ @Override
+ public long getChecksum() throws IOException {
+ flush();
+ return crc.getValue();
+ }
}
Index: lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (working copy)
@@ -1,5 +1,7 @@
package org.apache.lucene.store;
+import java.io.IOException;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,61 +19,24 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.zip.CRC32;
-import java.util.zip.Checksum;
-
-/** Reads bytes through to a primary IndexInput, computing
- * checksum as it goes. Note that you cannot use seek().
- *
- * @lucene.internal
+/**
+ * Extension of IndexInput, computing checksum as it goes.
+ * Callers can retrieve the checksum via {@link #getChecksum()}.
*/
-public class ChecksumIndexInput extends IndexInput {
- IndexInput main;
- Checksum digest;
-
- public ChecksumIndexInput(IndexInput main) {
- super("ChecksumIndexInput(" + main + ")");
- this.main = main;
- digest = new CRC32();
- }
-
- @Override
- public byte readByte() throws IOException {
- final byte b = main.readByte();
- digest.update(b);
- return b;
- }
-
- @Override
- public void readBytes(byte[] b, int offset, int len)
- throws IOException {
- main.readBytes(b, offset, len);
- digest.update(b, offset, len);
- }
-
+public abstract class ChecksumIndexInput extends IndexInput {
- public long getChecksum() {
- return digest.getValue();
+ /** resourceDescription should be a non-null, opaque string
+ * describing this resource; it's returned from
+ * {@link #toString}. */
+ protected ChecksumIndexInput(String resourceDescription) {
+ super(resourceDescription);
}
- @Override
- public void close() throws IOException {
- main.close();
- }
+ /** Returns the current checksum value */
+ public abstract long getChecksum() throws IOException;
@Override
- public long getFilePointer() {
- return main.getFilePointer();
- }
-
- @Override
public void seek(long pos) {
throw new UnsupportedOperationException();
}
-
- @Override
- public long length() {
- return main.length();
- }
}
Index: lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java (working copy)
@@ -1,78 +0,0 @@
-package org.apache.lucene.store;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.zip.CRC32;
-import java.util.zip.Checksum;
-
-/** Writes bytes through to a primary IndexOutput, computing
- * checksum.
- *
- * @lucene.internal
- */
-public class ChecksumIndexOutput extends IndexOutput {
- IndexOutput main;
- Checksum digest;
-
- public ChecksumIndexOutput(IndexOutput main) {
- this.main = main;
- digest = new CRC32();
- }
-
- @Override
- public void writeByte(byte b) throws IOException {
- digest.update(b);
- main.writeByte(b);
- }
-
- @Override
- public void writeBytes(byte[] b, int offset, int length) throws IOException {
- digest.update(b, offset, length);
- main.writeBytes(b, offset, length);
- }
-
- public long getChecksum() {
- return digest.getValue();
- }
-
- @Override
- public void flush() throws IOException {
- main.flush();
- }
-
- @Override
- public void close() throws IOException {
- main.close();
- }
-
- @Override
- public long getFilePointer() {
- return main.getFilePointer();
- }
-
- /** writes the checksum */
- public void finishCommit() throws IOException {
- main.writeLong(getChecksum());
- }
-
- @Override
- public long length() throws IOException {
- return main.length();
- }
-}
Index: lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java (working copy)
@@ -52,14 +52,15 @@
* </ul>
* <p>Description:</p>
* <ul>
- * <li>Compound (.cfs) --&gt; Header, FileData <sup>FileCount</sup></li>
+ * <li>Compound (.cfs) --&gt; Header, FileData <sup>FileCount</sup>, Footer</li>
* <li>Compound Entry Table (.cfe) --&gt; Header, FileCount, &lt;FileName,
* DataOffset, DataLength&gt; <sup>FileCount</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>FileCount --&gt; {@link DataOutput#writeVInt VInt}</li>
- * <li>DataOffset,DataLength --&gt; {@link DataOutput#writeLong UInt64}</li>
+ * <li>DataOffset,DataLength,Checksum --&gt; {@link DataOutput#writeLong UInt64}</li>
* <li>FileName --&gt; {@link DataOutput#writeString String}</li>
* <li>FileData --&gt; raw file data</li>
+ * <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
@@ -87,6 +88,7 @@
private static final Map<String,FileEntry> SENTINEL = Collections.emptyMap();
private final CompoundFileWriter writer;
private final IndexInputSlicer handle;
+ private int version;
/**
* Create a new CompoundFileDirectory.
@@ -120,15 +122,15 @@
}
/** Helper method that reads CFS entries from an input stream */
- private static final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException {
+ private final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException {
IOException priorE = null;
- IndexInput entriesStream = null;
+ ChecksumIndexInput entriesStream = null;
try {
final String entriesFileName = IndexFileNames.segmentFileName(
IndexFileNames.stripExtension(name), "",
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
- entriesStream = dir.openInput(entriesFileName, IOContext.READONCE);
- CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_START);
+ entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE);
+ version = CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
final int numEntries = entriesStream.readVInt();
final Map<String, FileEntry> mapping = new HashMap<>(numEntries);
for (int i = 0; i < numEntries; i++) {
@@ -141,8 +143,10 @@
fileEntry.offset = entriesStream.readLong();
fileEntry.length = entriesStream.readLong();
}
- if (entriesStream.getFilePointer() != entriesStream.length()) {
- throw new CorruptIndexException("did not read all bytes from file \"" + entriesFileName + "\": read " + entriesStream.getFilePointer() + " vs size " + entriesStream.length() + " (resource: " + entriesStream + ")");
+ if (version >= CompoundFileWriter.VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(entriesStream);
+ } else {
+ CodecUtil.checkEOF(entriesStream);
}
return mapping;
} catch (IOException ioe) {
Index: lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java (working copy)
@@ -54,7 +54,8 @@
// versioning for the .cfs file
static final String DATA_CODEC = "CompoundFileWriterData";
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_CHECKSUM = 1;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
// versioning for the .cfe file
static final String ENTRY_CODEC = "CompoundFileWriterEntries";
@@ -140,6 +141,7 @@
// open the compound stream
getOutput();
assert dataOut != null;
+ CodecUtil.writeFooter(dataOut);
} catch (IOException e) {
priorException = e;
} finally {
@@ -202,6 +204,7 @@
entryOut.writeLong(fe.offset);
entryOut.writeLong(fe.length);
}
+ CodecUtil.writeFooter(entryOut);
}
IndexOutput createOutput(String name, IOContext context) throws IOException {
@@ -342,6 +345,11 @@
writtenBytes += length;
delegate.writeBytes(b, offset, length);
}
+
+ @Override
+ public long getChecksum() throws IOException {
+ return delegate.getChecksum();
+ }
}
}
Index: lucene/core/src/java/org/apache/lucene/store/Directory.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/Directory.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/Directory.java (working copy)
@@ -100,8 +100,13 @@
* <p>Throws {@link FileNotFoundException} or {@link NoSuchFileException}
* if the file does not exist.
*/
- public abstract IndexInput openInput(String name, IOContext context) throws IOException;
+ public abstract IndexInput openInput(String name, IOContext context) throws IOException;
+ /** Returns a stream reading an existing file, computing checksum as it reads */
+ public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException {
+ return new BufferedChecksumIndexInput(openInput(name, context));
+ }
+
/** Construct a {@link Lock}.
* @param name the name of the lock file
*/
Index: lucene/core/src/java/org/apache/lucene/store/IndexOutput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/IndexOutput.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/IndexOutput.java (working copy)
@@ -43,6 +43,8 @@
*/
public abstract long getFilePointer();
+ /** Returns the current checksum of bytes written so far */
+ public abstract long getChecksum() throws IOException;
/** The number of bytes in the file. */
public abstract long length() throws IOException;
Index: lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java (revision 1583220)
+++ lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java (working copy)
@@ -18,6 +18,8 @@
*/
import java.io.IOException;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
/**
* A memory-resident {@link IndexOutput} implementation.
@@ -35,6 +37,8 @@
private int bufferPosition;
private long bufferStart;
private int bufferLength;
+
+ private Checksum crc = new BufferedChecksum(new CRC32());
/** Construct an empty output buffer. */
public RAMOutputStream() {
@@ -95,6 +99,7 @@
bufferStart = 0;
bufferLength = 0;
file.setLength(0);
+ crc.reset();
}
@Override
@@ -113,6 +118,7 @@
currentBufferIndex++;
switchCurrentBuffer();
}
+ crc.update(b);
currentBuffer[bufferPosition++] = b;
}
@@ -119,6 +125,7 @@
@Override
public void writeBytes(byte[] b, int offset, int len) throws IOException {
assert b != null;
+ crc.update(b, offset, len);
while (len > 0) {
if (bufferPosition == bufferLength) {
currentBufferIndex++;
@@ -165,5 +172,10 @@
/** Returns byte usage of all buffers. */
public long sizeInBytes() {
return (long) file.numBuffers() * (long) BUFFER_SIZE;
- }
+ }
+
+ @Override
+ public long getChecksum() throws IOException {
+ return crc.getValue();
+ }
}
Index: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java (revision 0)
+++ lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java (working copy)
@@ -0,0 +1,90 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.store.CompoundFileDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Test that a plain default puts CRC32 footers in all files.
+ */
+public class TestAllFilesHaveChecksumFooter extends LuceneTestCase {
+ public void test() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ conf.setCodec(new Lucene46Codec());
+ RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
+ Document doc = new Document();
+ // these fields should sometimes get term vectors, etc
+ Field idField = newStringField("id", "", Field.Store.NO);
+ Field bodyField = newTextField("body", "", Field.Store.NO);
+ Field dvField = new NumericDocValuesField("dv", 5);
+ doc.add(idField);
+ doc.add(bodyField);
+ doc.add(dvField);
+ for (int i = 0; i < 100; i++) {
+ idField.setStringValue(Integer.toString(i));
+ bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
+ riw.addDocument(doc);
+ if (random().nextInt(7) == 0) {
+ riw.commit();
+ }
+ if (random().nextInt(20) == 0) {
+ riw.deleteDocuments(new Term("id", Integer.toString(i)));
+ }
+ }
+ riw.close();
+ checkHeaders(dir);
+ dir.close();
+ }
+
+ private void checkHeaders(Directory dir) throws IOException {
+ for (String file : dir.listAll()) {
+ if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) {
+ CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false);
+ checkHeaders(cfsDir); // recurse into cfs
+ cfsDir.close();
+ }
+ IndexInput in = null;
+ boolean success = false;
+ try {
+ in = dir.openInput(file, newIOContext(random()));
+ CodecUtil.checksumEntireFile(in);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(in);
+ } else {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+ }
+ }
+}
Property changes on: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (revision 1583220)
+++ lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
@@ -39,14 +40,15 @@
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
conf.setCodec(new Lucene46Codec());
- // riw should sometimes create docvalues fields, etc
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
// these fields should sometimes get term vectors, etc
Field idField = newStringField("id", "", Field.Store.NO);
Field bodyField = newTextField("body", "", Field.Store.NO);
+ Field dvField = new NumericDocValuesField("dv", 5);
doc.add(idField);
doc.add(bodyField);
+ doc.add(dvField);
for (int i = 0; i < 100; i++) {
idField.setStringValue(Integer.toString(i));
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
@@ -54,6 +56,10 @@
if (random().nextInt(7) == 0) {
riw.commit();
}
+ // TODO: we should make a new format with a clean header...
+ // if (random().nextInt(20) == 0) {
+ // riw.deleteDocuments(new Term("id", Integer.toString(i)));
+ // }
}
riw.close();
checkHeaders(dir);
Index: lucene/core/src/test/org/apache/lucene/index/TestDoc.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestDoc.java (revision 1583220)
+++ lucene/core/src/test/org/apache/lucene/index/TestDoc.java (working copy)
@@ -221,7 +221,7 @@
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
- MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);
+ MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true);
MergeState mergeState = merger.merge();
r1.close();
Index: lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1583220)
+++ lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy)
@@ -83,7 +83,7 @@
SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2),
si, InfoStream.getDefault(), mergedDir,
- MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
+ MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()), true);
MergeState mergeState = merger.merge();
int docsMerged = mergeState.segmentInfo.getDocCount();
assertTrue(docsMerged == 2);
Index: lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java (revision 0)
+++ lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java (working copy)
@@ -0,0 +1,68 @@
+package org.apache.lucene.store;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestBufferedChecksum extends LuceneTestCase {
+
+ public void testSimple() {
+ Checksum c = new BufferedChecksum(new CRC32());
+ c.update(1);
+ c.update(2);
+ c.update(3);
+ assertEquals(1438416925L, c.getValue());
+ }
+
+ public void testRandom() {
+ Checksum c1 = new CRC32();
+ Checksum c2 = new BufferedChecksum(new CRC32());
+ int iterations = atLeast(10000);
+ for (int i = 0; i < iterations; i++) {
+ switch(random().nextInt(4)) {
+ case 0:
+ // update(byte[], int, int)
+ int length = random().nextInt(1024);
+ byte bytes[] = new byte[length];
+ random().nextBytes(bytes);
+ c1.update(bytes, 0, bytes.length);
+ c2.update(bytes, 0, bytes.length);
+ break;
+ case 1:
+ // update(int)
+ int b = random().nextInt(256);
+ c1.update(b);
+ c2.update(b);
+ break;
+ case 2:
+ // reset()
+ c1.reset();
+ c2.reset();
+ break;
+ case 3:
+ // getValue()
+ assertEquals(c1.getValue(), c2.getValue());
+ break;
+ }
+ }
+ assertEquals(c1.getValue(), c2.getValue());
+ }
+}
Property changes on: lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java (revision 1583220)
+++ lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java (working copy)
@@ -30,12 +30,13 @@
public void testOverrides() throws Exception {
// verify that all methods of Directory are overridden by FilterDirectory,
// except those under the 'exclude' list
- Set<String> exclude = new HashSet<>();
- exclude.add("copy");
- exclude.add("createSlicer");
+ Set<Method> exclude = new HashSet<>();
+ exclude.add(Directory.class.getMethod("copy", Directory.class, String.class, String.class, IOContext.class));
+ exclude.add(Directory.class.getMethod("createSlicer", String.class, IOContext.class));
+ exclude.add(Directory.class.getMethod("openChecksumInput", String.class, IOContext.class));
for (Method m : FilterDirectory.class.getMethods()) {
if (m.getDeclaringClass() == Directory.class) {
- assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m.getName()));
+ assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m));
}
}
}
Index: lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java
===================================================================
--- lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java (revision 1583220)
+++ lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java (working copy)
@@ -167,6 +167,7 @@
@Override public void flush() throws IOException { io.flush(); }
@Override public long getFilePointer() { return io.getFilePointer(); }
@Override public long length() throws IOException { return io.length(); }
+ @Override public long getChecksum() throws IOException { return io.getChecksum(); }
}
}
Index: lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
===================================================================
--- lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1583220)
+++ lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy)
@@ -794,6 +794,11 @@
return null;
}
+ @Override
+ public void validate() throws IOException {
+ // no-op
+ }
+
private class MemoryFields extends Fields {
@Override
public Iterator<String> iterator() {
Index: lucene/misc/build.xml
===================================================================
--- lucene/misc/build.xml (revision 1583220)
+++ lucene/misc/build.xml (working copy)
@@ -44,7 +44,7 @@
<target name="build-native-unix" depends="install-cpptasks">
<mkdir dir="${common.build.dir}/native"/>
- <cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
+ <cc outtype="shared" name="c++" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
<includepath>
<pathelement location="${java.home}/../include"/>
@@ -54,7 +54,7 @@
</includepath>
<compilerarg value="-fPIC" />
- <linkerarg value="-lstdc++" />
+ <syslibset libs="stdc++"/>
</cc>
</target>
Index: lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
===================================================================
--- lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java (revision 1583220)
+++ lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java (working copy)
@@ -240,6 +240,11 @@
public long length() {
return fileLength + bufferPos;
}
+
+ @Override
+ public long getChecksum() throws IOException {
+ throw new UnsupportedOperationException("this directory currently does not work at all!");
+ }
@Override
public void close() throws IOException {
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java (working copy)
@@ -306,5 +306,10 @@
public long ramBytesUsed() {
return in.ramBytesUsed();
}
+
+ @Override
+ public void validate() throws IOException {
+ in.validate();
+ }
}
}
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (working copy)
@@ -89,6 +89,11 @@
public long ramBytesUsed() {
return in.ramBytesUsed();
}
+
+ @Override
+ public void validate() throws IOException {
+ in.validate();
+ }
}
static class AssertingFieldsConsumer extends FieldsConsumer {
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (working copy)
@@ -76,6 +76,11 @@
public long ramBytesUsed() {
return in.ramBytesUsed();
}
+
+ @Override
+ public void validate() throws IOException {
+ in.validate();
+ }
}
enum Status {
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (working copy)
@@ -75,6 +75,11 @@
public long ramBytesUsed() {
return in.ramBytesUsed();
}
+
+ @Override
+ public void validate() throws IOException {
+ in.validate();
+ }
}
enum Status {
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (working copy)
@@ -46,7 +46,7 @@
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
import org.apache.lucene.util.packed.PackedInts;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_GCD_COMPRESSION;
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BYTES;
import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.NUMBER;
@@ -71,10 +71,11 @@
try {
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.createOutput(dataName, state.context);
- CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+ // this writer writes the format 4.2 did!
+ CodecUtil.writeHeader(data, dataCodec, VERSION_GCD_COMPRESSION);
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
meta = state.directory.createOutput(metaName, state.context);
- CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+ CodecUtil.writeHeader(meta, metaCodec, VERSION_GCD_COMPRESSION);
success = true;
} finally {
if (!success) {
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (working copy)
@@ -94,6 +94,9 @@
}
return sizeInBytes;
}
+
+ @Override
+ public void validate() throws IOException {}
}
static class RAMField extends Terms {
Index: lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java (working copy)
@@ -166,6 +166,11 @@
}
@Override
+ public long getChecksum() throws IOException {
+ return delegate.getChecksum();
+ }
+
+ @Override
public String toString() {
return "MockIndexOutputWrapper(" + delegate + ")";
}
Index: lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (working copy)
@@ -810,6 +810,7 @@
}
c.setUseCompoundFile(r.nextBoolean());
c.setReaderPooling(r.nextBoolean());
+ c.setValidateAtMerge(r.nextBoolean());
return c;
}
Index: lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (working copy)
@@ -243,6 +243,7 @@
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream infoStream = new PrintStream(bos, false, "UTF-8");
+ reader.validate();
FieldNormStatus fieldNormStatus = CheckIndex.testFieldNorms(reader, infoStream);
TermIndexStatus termIndexStatus = CheckIndex.testPostings(reader, infoStream);
StoredFieldStatus storedFieldStatus = CheckIndex.testStoredFields(reader, infoStream);
Index: lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java (revision 1583220)
+++ lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java (working copy)
@@ -145,4 +145,9 @@
public void copyBytes(DataInput input, long numBytes) throws IOException {
delegate.copyBytes(input, numBytes);
}
+
+ @Override
+ public long getChecksum() throws IOException {
+ return delegate.getChecksum();
+ }
}
Index: solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java
===================================================================
--- solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java (revision 1583220)
+++ solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java (working copy)
@@ -88,5 +88,10 @@
offset += len;
}
}
-
+
+ @Override
+ public long getChecksum() throws IOException {
+ flush();
+ return dest.getChecksum();
+ }
}
Index: solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java
===================================================================
--- solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java (revision 1583220)
+++ solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java (working copy)
@@ -66,5 +66,9 @@
length = pos;
}
}
-
+
+ @Override
+ public long getChecksum() throws IOException {
+ return 0; // we don't write anything.
+ }
}
Index: solr/core/src/test/org/apache/solr/search/TestDocSet.java
===================================================================
--- solr/core/src/test/org/apache/solr/search/TestDocSet.java (revision 1583220)
+++ solr/core/src/test/org/apache/solr/search/TestDocSet.java (working copy)
@@ -418,6 +418,10 @@
@Override
public void document(int doc, StoredFieldVisitor visitor) {
}
+
+ @Override
+ public void validate() throws IOException {
+ }
};
}