| Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (working copy) |
| @@ -177,7 +177,10 @@ |
| } |
| |
| private void seekDir(IndexInput input, long dirOffset) throws IOException { |
| - if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) { |
| + if (version >= BlockTermsWriter.VERSION_CHECKSUM) { |
| + input.seek(input.length() - CodecUtil.footerLength() - 8); |
| + dirOffset = input.readLong(); |
| + } else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) { |
| input.seek(input.length() - 8); |
| dirOffset = input.readLong(); |
| } |
| @@ -863,4 +866,14 @@ |
| sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0; |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + // verify terms |
| + if (version >= BlockTermsWriter.VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(in); |
| + } |
| + // verify postings |
| + postingsReader.validate(); |
| + } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (working copy) |
| @@ -63,12 +63,13 @@ |
| public static final int VERSION_START = 0; |
| public static final int VERSION_APPEND_ONLY = 1; |
| public static final int VERSION_META_ARRAY = 2; |
| - public static final int VERSION_CURRENT = VERSION_META_ARRAY; |
| + public static final int VERSION_CHECKSUM = 3; |
| + public static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| /** Extension of terms file */ |
| static final String TERMS_EXTENSION = "tib"; |
| |
| - protected final IndexOutput out; |
| + protected IndexOutput out; |
| final PostingsWriterBase postingsWriter; |
| final FieldInfos fieldInfos; |
| FieldInfo currentField; |
| @@ -176,26 +177,30 @@ |
| } |
| |
| public void close() throws IOException { |
| - try { |
| - final long dirStart = out.getFilePointer(); |
| - |
| - out.writeVInt(fields.size()); |
| - for(FieldMetaData field : fields) { |
| - out.writeVInt(field.fieldInfo.number); |
| - out.writeVLong(field.numTerms); |
| - out.writeVLong(field.termsStartPointer); |
| - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { |
| - out.writeVLong(field.sumTotalTermFreq); |
| + if (out != null) { |
| + try { |
| + final long dirStart = out.getFilePointer(); |
| + |
| + out.writeVInt(fields.size()); |
| + for(FieldMetaData field : fields) { |
| + out.writeVInt(field.fieldInfo.number); |
| + out.writeVLong(field.numTerms); |
| + out.writeVLong(field.termsStartPointer); |
| + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { |
| + out.writeVLong(field.sumTotalTermFreq); |
| + } |
| + out.writeVLong(field.sumDocFreq); |
| + out.writeVInt(field.docCount); |
| + if (VERSION_CURRENT >= VERSION_META_ARRAY) { |
| + out.writeVInt(field.longsSize); |
| + } |
| } |
| - out.writeVLong(field.sumDocFreq); |
| - out.writeVInt(field.docCount); |
| - if (VERSION_CURRENT >= VERSION_META_ARRAY) { |
| - out.writeVInt(field.longsSize); |
| - } |
| + writeTrailer(dirStart); |
| + CodecUtil.writeFooter(out); |
| + } finally { |
| + IOUtils.close(out, postingsWriter, termsIndexWriter); |
| + out = null; |
| } |
| - writeTrailer(dirStart); |
| - } finally { |
| - IOUtils.close(out, postingsWriter, termsIndexWriter); |
| } |
| } |
| |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (working copy) |
| @@ -66,6 +66,8 @@ |
| // start of the field info data |
| private long dirOffset; |
| |
| + private int version; |
| + |
| public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context) |
| throws IOException { |
| |
| @@ -78,6 +80,11 @@ |
| try { |
| |
| readHeader(in); |
| + |
| + if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(in); |
| + } |
| + |
| indexInterval = in.readVInt(); |
| if (indexInterval < 1) { |
| throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")"); |
| @@ -124,7 +131,7 @@ |
| } |
| |
| private void readHeader(IndexInput input) throws IOException { |
| - CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME, |
| + version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME, |
| FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT); |
| } |
| |
| @@ -273,7 +280,11 @@ |
| public void close() throws IOException {} |
| |
| private void seekDir(IndexInput input, long dirOffset) throws IOException { |
| - input.seek(input.length() - 8); |
| + if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { |
| + input.seek(input.length() - CodecUtil.footerLength() - 8); |
| + } else { |
| + input.seek(input.length() - 8); |
| + } |
| dirOffset = input.readLong(); |
| input.seek(dirOffset); |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (working copy) |
| @@ -26,7 +26,6 @@ |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| -import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; |
| import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; |
| import org.apache.lucene.util.packed.PackedInts; |
| |
| @@ -43,7 +42,7 @@ |
| * |
| * @lucene.experimental */ |
| public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { |
| - protected final IndexOutput out; |
| + protected IndexOutput out; |
| |
| /** Extension of terms index file */ |
| static final String TERMS_INDEX_EXTENSION = "tii"; |
| @@ -52,7 +51,8 @@ |
| final static int VERSION_START = 0; |
| final static int VERSION_APPEND_ONLY = 1; |
| final static int VERSION_MONOTONIC_ADDRESSING = 2; |
| - final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING; |
| + final static int VERSION_CHECKSUM = 3; |
| + final static int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| final static int BLOCKSIZE = 4096; |
| final private int termIndexInterval; |
| @@ -207,39 +207,43 @@ |
| |
| @Override |
| public void close() throws IOException { |
| - boolean success = false; |
| - try { |
| - final long dirStart = out.getFilePointer(); |
| - final int fieldCount = fields.size(); |
| - |
| - int nonNullFieldCount = 0; |
| - for(int i=0;i<fieldCount;i++) { |
| - SimpleFieldWriter field = fields.get(i); |
| - if (field.numIndexTerms > 0) { |
| - nonNullFieldCount++; |
| + if (out != null) { |
| + boolean success = false; |
| + try { |
| + final long dirStart = out.getFilePointer(); |
| + final int fieldCount = fields.size(); |
| + |
| + int nonNullFieldCount = 0; |
| + for(int i=0;i<fieldCount;i++) { |
| + SimpleFieldWriter field = fields.get(i); |
| + if (field.numIndexTerms > 0) { |
| + nonNullFieldCount++; |
| + } |
| } |
| - } |
| - |
| - out.writeVInt(nonNullFieldCount); |
| - for(int i=0;i<fieldCount;i++) { |
| - SimpleFieldWriter field = fields.get(i); |
| - if (field.numIndexTerms > 0) { |
| - out.writeVInt(field.fieldInfo.number); |
| - out.writeVInt(field.numIndexTerms); |
| - out.writeVLong(field.termsStart); |
| - out.writeVLong(field.indexStart); |
| - out.writeVLong(field.packedIndexStart); |
| - out.writeVLong(field.packedOffsetsStart); |
| + |
| + out.writeVInt(nonNullFieldCount); |
| + for(int i=0;i<fieldCount;i++) { |
| + SimpleFieldWriter field = fields.get(i); |
| + if (field.numIndexTerms > 0) { |
| + out.writeVInt(field.fieldInfo.number); |
| + out.writeVInt(field.numIndexTerms); |
| + out.writeVLong(field.termsStart); |
| + out.writeVLong(field.indexStart); |
| + out.writeVLong(field.packedIndexStart); |
| + out.writeVLong(field.packedOffsetsStart); |
| + } |
| } |
| + writeTrailer(dirStart); |
| + CodecUtil.writeFooter(out); |
| + success = true; |
| + } finally { |
| + if (success) { |
| + IOUtils.close(out); |
| + } else { |
| + IOUtils.closeWhileHandlingException(out); |
| + } |
| + out = null; |
| } |
| - writeTrailer(dirStart); |
| - success = true; |
| - } finally { |
| - if (success) { |
| - IOUtils.close(out); |
| - } else { |
| - IOUtils.closeWhileHandlingException(out); |
| - } |
| } |
| } |
| |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (working copy) |
| @@ -62,6 +62,10 @@ |
| try { |
| |
| version = readHeader(in); |
| + |
| + if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(in); |
| + } |
| |
| seekDir(in, dirOffset); |
| |
| @@ -190,7 +194,10 @@ |
| public void close() throws IOException {} |
| |
| private void seekDir(IndexInput input, long dirOffset) throws IOException { |
| - if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { |
| + if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { |
| + input.seek(input.length() - CodecUtil.footerLength() - 8); |
| + dirOffset = input.readLong(); |
| + } else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { |
| input.seek(input.length() - 8); |
| dirOffset = input.readLong(); |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (working copy) |
| @@ -45,7 +45,7 @@ |
| * |
| * @lucene.experimental */ |
| public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { |
| - protected final IndexOutput out; |
| + protected IndexOutput out; |
| |
| /** Extension of terms index file */ |
| static final String TERMS_INDEX_EXTENSION = "tiv"; |
| @@ -53,7 +53,8 @@ |
| final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX"; |
| final static int VERSION_START = 0; |
| final static int VERSION_APPEND_ONLY = 1; |
| - final static int VERSION_CURRENT = VERSION_APPEND_ONLY; |
| + final static int VERSION_CHECKSUM = 2; |
| + final static int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| private final List<FSTFieldWriter> fields = new ArrayList<>(); |
| |
| @@ -290,31 +291,35 @@ |
| |
| @Override |
| public void close() throws IOException { |
| - try { |
| - final long dirStart = out.getFilePointer(); |
| - final int fieldCount = fields.size(); |
| - |
| - int nonNullFieldCount = 0; |
| - for(int i=0;i<fieldCount;i++) { |
| - FSTFieldWriter field = fields.get(i); |
| - if (field.fst != null) { |
| - nonNullFieldCount++; |
| + if (out != null) { |
| + try { |
| + final long dirStart = out.getFilePointer(); |
| + final int fieldCount = fields.size(); |
| + |
| + int nonNullFieldCount = 0; |
| + for(int i=0;i<fieldCount;i++) { |
| + FSTFieldWriter field = fields.get(i); |
| + if (field.fst != null) { |
| + nonNullFieldCount++; |
| + } |
| + } |
| + |
| + out.writeVInt(nonNullFieldCount); |
| + for(int i=0;i<fieldCount;i++) { |
| + FSTFieldWriter field = fields.get(i); |
| + if (field.fst != null) { |
| + out.writeVInt(field.fieldInfo.number); |
| + out.writeVLong(field.indexStart); |
| + } |
| + } |
| + writeTrailer(dirStart); |
| + CodecUtil.writeFooter(out); |
| + } finally { |
| + out.close(); |
| + out = null; |
| } |
| } |
| - |
| - out.writeVInt(nonNullFieldCount); |
| - for(int i=0;i<fieldCount;i++) { |
| - FSTFieldWriter field = fields.get(i); |
| - if (field.fst != null) { |
| - out.writeVInt(field.fieldInfo.number); |
| - out.writeVLong(field.indexStart); |
| - } |
| - } |
| - writeTrailer(dirStart); |
| - } finally { |
| - out.close(); |
| } |
| - } |
| |
| private void writeTrailer(long dirStart) throws IOException { |
| out.writeLong(dirStart); |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (working copy) |
| @@ -39,8 +39,8 @@ |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.DataOutput; |
| -import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -66,7 +66,7 @@ |
| * </p> |
| * <ul> |
| * <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName, |
| - * NumFilteredFields, Filter<sup>NumFilteredFields</sup></li> |
| + * NumFilteredFields, Filter<sup>NumFilteredFields</sup>, Footer</li> |
| * <li>Filter --> FieldNumber, FuzzySet</li> |
| * <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| @@ -75,6 +75,7 @@ |
| * <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li> |
| * <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the |
| * field in this segment</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * @lucene.experimental |
| */ |
| @@ -81,7 +82,9 @@ |
| public final class BloomFilteringPostingsFormat extends PostingsFormat { |
| |
| public static final String BLOOM_CODEC_NAME = "BloomFilter"; |
| - public static final int BLOOM_CODEC_VERSION = 1; |
| + public static final int VERSION_START = 1; |
| + public static final int VERSION_CHECKSUM = 2; |
| + public static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| /** Extension of Bloom Filters file */ |
| static final String BLOOM_EXTENSION = "blm"; |
| @@ -157,12 +160,11 @@ |
| |
| String bloomFileName = IndexFileNames.segmentFileName( |
| state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); |
| - IndexInput bloomIn = null; |
| + ChecksumIndexInput bloomIn = null; |
| boolean success = false; |
| try { |
| - bloomIn = state.directory.openInput(bloomFileName, state.context); |
| - CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, |
| - BLOOM_CODEC_VERSION); |
| + bloomIn = state.directory.openChecksumInput(bloomFileName, state.context); |
| + int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT); |
| // // Load the hash function used in the BloomFilter |
| // hashFunction = HashFunction.forName(bloomIn.readString()); |
| // Load the delegate postings format |
| @@ -178,6 +180,11 @@ |
| FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); |
| bloomsByFieldName.put(fieldInfo.name, bloom); |
| } |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(bloomIn); |
| + } else { |
| + CodecUtil.checkEOF(bloomIn); |
| + } |
| IOUtils.close(bloomIn); |
| success = true; |
| } finally { |
| @@ -390,6 +397,11 @@ |
| } |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + delegateFieldsProducer.validate(); |
| + } |
| } |
| |
| class BloomFilteredFieldsConsumer extends FieldsConsumer { |
| @@ -466,10 +478,8 @@ |
| state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); |
| IndexOutput bloomOutput = null; |
| try { |
| - bloomOutput = state.directory |
| - .createOutput(bloomFileName, state.context); |
| - CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, |
| - BLOOM_CODEC_VERSION); |
| + bloomOutput = state.directory.createOutput(bloomFileName, state.context); |
| + CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT); |
| // remember the name of the postings format we will delegate to |
| bloomOutput.writeString(delegatePostingsFormat.getName()); |
| |
| @@ -481,6 +491,7 @@ |
| bloomOutput.writeInt(fieldInfo.number); |
| saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo); |
| } |
| + CodecUtil.writeFooter(bloomOutput); |
| } finally { |
| IOUtils.close(bloomOutput); |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (working copy) |
| @@ -40,7 +40,7 @@ |
| */ |
| |
| class DirectDocValuesConsumer extends DocValuesConsumer { |
| - final IndexOutput data, meta; |
| + IndexOutput data, meta; |
| final int maxDoc; |
| |
| DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| @@ -142,7 +142,11 @@ |
| try { |
| if (meta != null) { |
| meta.writeVInt(-1); // write EOF marker |
| + CodecUtil.writeFooter(meta); // write checksum |
| } |
| + if (data != null) { |
| + CodecUtil.writeFooter(data); |
| + } |
| success = true; |
| } finally { |
| if (success) { |
| @@ -150,6 +154,7 @@ |
| } else { |
| IOUtils.closeWhileHandlingException(data, meta); |
| } |
| + data = meta = null; |
| } |
| } |
| |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (working copy) |
| @@ -33,6 +33,7 @@ |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -65,6 +66,7 @@ |
| |
| private final int maxDoc; |
| private final AtomicLong ramBytesUsed; |
| + private final int version; |
| |
| static final byte NUMBER = 0; |
| static final byte BYTES = 1; |
| @@ -72,16 +74,16 @@ |
| static final byte SORTED_SET = 3; |
| |
| static final int VERSION_START = 0; |
| - static final int VERSION_CURRENT = VERSION_START; |
| + static final int VERSION_CHECKSUM = 1; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| maxDoc = state.segmentInfo.getDocCount(); |
| String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); |
| // read in the entries from the metadata file. |
| - IndexInput in = state.directory.openInput(metaName, state.context); |
| + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); |
| ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); |
| boolean success = false; |
| - final int version; |
| try { |
| version = CodecUtil.checkHeader(in, metaCodec, |
| VERSION_START, |
| @@ -88,6 +90,11 @@ |
| VERSION_CURRENT); |
| readFields(in); |
| |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(in); |
| + } else { |
| + CodecUtil.checkEOF(in); |
| + } |
| success = true; |
| } finally { |
| if (success) { |
| @@ -186,6 +193,13 @@ |
| } |
| |
| @Override |
| + public void validate() throws IOException { |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(data); |
| + } |
| + } |
| + |
| + @Override |
| public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException { |
| NumericDocValues instance = numericInstances.get(field.number); |
| if (instance == null) { |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (working copy) |
| @@ -109,6 +109,7 @@ |
| if (state.context.context != IOContext.Context.MERGE) { |
| FieldsProducer loadedPostings; |
| try { |
| + postings.validate(); |
| loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff); |
| } finally { |
| postings.close(); |
| @@ -157,6 +158,12 @@ |
| } |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + // if we read entirely into ram, we already validated. |
| + // otherwise returned the raw postings reader |
| + } |
| } |
| |
| private final static class DirectField extends Terms { |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (working copy) |
| @@ -38,6 +38,7 @@ |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.automaton.ByteRunAutomaton; |
| @@ -56,7 +57,6 @@ |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.codecs.CodecUtil; |
| -import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader; |
| |
| /** |
| * FST-based terms dictionary reader. |
| @@ -63,7 +63,7 @@ |
| * |
| * The FST index maps each term and its ord, and during seek |
| * the ord is used fetch metadata from a single block. |
| - * The term dictionary is fully memeory resident. |
| + * The term dictionary is fully memory resident. |
| * |
| * @lucene.experimental |
| */ |
| @@ -71,8 +71,7 @@ |
| static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL; |
| final TreeMap<String, TermsReader> fields = new TreeMap<>(); |
| final PostingsReaderBase postingsReader; |
| - IndexInput indexIn = null; |
| - IndexInput blockIn = null; |
| + int version; |
| //static final boolean TEST = false; |
| |
| public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { |
| @@ -80,11 +79,18 @@ |
| final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); |
| |
| this.postingsReader = postingsReader; |
| + ChecksumIndexInput indexIn = null; |
| + IndexInput blockIn = null; |
| + boolean success = false; |
| try { |
| - this.indexIn = state.directory.openInput(termsIndexFileName, state.context); |
| - this.blockIn = state.directory.openInput(termsBlockFileName, state.context); |
| - readHeader(indexIn); |
| + indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context); |
| + blockIn = state.directory.openInput(termsBlockFileName, state.context); |
| + version = readHeader(indexIn); |
| readHeader(blockIn); |
| + if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(blockIn); |
| + } |
| + |
| this.postingsReader.init(blockIn); |
| seekDir(blockIn); |
| |
| @@ -100,12 +106,22 @@ |
| int longsSize = blockIn.readVInt(); |
| FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton()); |
| |
| - TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); |
| + TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); |
| TermsReader previous = fields.put(fieldInfo.name, current); |
| - checkFieldSummary(state.segmentInfo, current, previous); |
| + checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous); |
| } |
| + if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(indexIn); |
| + } else { |
| + CodecUtil.checkEOF(indexIn); |
| + } |
| + success = true; |
| } finally { |
| - IOUtils.closeWhileHandlingException(indexIn, blockIn); |
| + if (success) { |
| + IOUtils.close(indexIn, blockIn); |
| + } else { |
| + IOUtils.closeWhileHandlingException(indexIn, blockIn); |
| + } |
| } |
| } |
| |
| @@ -115,10 +131,14 @@ |
| FSTOrdTermsWriter.TERMS_VERSION_CURRENT); |
| } |
| private void seekDir(IndexInput in) throws IOException { |
| - in.seek(in.length() - 8); |
| + if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { |
| + in.seek(in.length() - CodecUtil.footerLength() - 8); |
| + } else { |
| + in.seek(in.length() - 8); |
| + } |
| in.seek(in.readLong()); |
| } |
| - private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException { |
| + private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException { |
| // #docs with field must be <= #docs |
| if (field.docCount < 0 || field.docCount > info.getDocCount()) { |
| throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")"); |
| @@ -176,7 +196,7 @@ |
| final byte[] metaLongsBlock; |
| final byte[] metaBytesBlock; |
| |
| - TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException { |
| + TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException { |
| this.fieldInfo = fieldInfo; |
| this.numTerms = numTerms; |
| this.sumTotalTermFreq = sumTotalTermFreq; |
| @@ -819,4 +839,9 @@ |
| } |
| return ramBytesUsed; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + postingsReader.validate(); |
| + } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (working copy) |
| @@ -73,9 +73,10 @@ |
| * </p> |
| * |
| * <ul> |
| - * <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li> |
| + * <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup>, Footer</li> |
| * <li>TermFST --> {@link FST FST<long>}</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * |
| * <p>Notes:</p> |
| @@ -103,7 +104,7 @@ |
| * <ul> |
| * <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li> |
| * <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq, |
| - * DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li> |
| + * DocCount, LongsSize, DataBlock > <sup>NumFields</sup>, Footer</li> |
| * |
| * <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength, |
| * SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li> |
| @@ -119,6 +120,7 @@ |
| * <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength, |
| * StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq, |
| * LongDelta,--> {@link DataOutput#writeVLong VLong}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes: </p> |
| * <ul> |
| @@ -148,7 +150,8 @@ |
| static final String TERMS_BLOCK_EXTENSION = "tbk"; |
| static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT"; |
| public static final int TERMS_VERSION_START = 0; |
| - public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START; |
| + public static final int TERMS_VERSION_CHECKSUM = 1; |
| + public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM; |
| public static final int SKIP_INTERVAL = 8; |
| |
| final PostingsWriterBase postingsWriter; |
| @@ -218,36 +221,41 @@ |
| } |
| |
| public void close() throws IOException { |
| - IOException ioe = null; |
| - try { |
| - final long blockDirStart = blockOut.getFilePointer(); |
| - |
| - // write field summary |
| - blockOut.writeVInt(fields.size()); |
| - for (FieldMetaData field : fields) { |
| - blockOut.writeVInt(field.fieldInfo.number); |
| - blockOut.writeVLong(field.numTerms); |
| - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { |
| - blockOut.writeVLong(field.sumTotalTermFreq); |
| + if (blockOut != null) { |
| + IOException ioe = null; |
| + try { |
| + final long blockDirStart = blockOut.getFilePointer(); |
| + |
| + // write field summary |
| + blockOut.writeVInt(fields.size()); |
| + for (FieldMetaData field : fields) { |
| + blockOut.writeVInt(field.fieldInfo.number); |
| + blockOut.writeVLong(field.numTerms); |
| + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { |
| + blockOut.writeVLong(field.sumTotalTermFreq); |
| + } |
| + blockOut.writeVLong(field.sumDocFreq); |
| + blockOut.writeVInt(field.docCount); |
| + blockOut.writeVInt(field.longsSize); |
| + blockOut.writeVLong(field.statsOut.getFilePointer()); |
| + blockOut.writeVLong(field.metaLongsOut.getFilePointer()); |
| + blockOut.writeVLong(field.metaBytesOut.getFilePointer()); |
| + |
| + field.skipOut.writeTo(blockOut); |
| + field.statsOut.writeTo(blockOut); |
| + field.metaLongsOut.writeTo(blockOut); |
| + field.metaBytesOut.writeTo(blockOut); |
| + field.dict.save(indexOut); |
| } |
| - blockOut.writeVLong(field.sumDocFreq); |
| - blockOut.writeVInt(field.docCount); |
| - blockOut.writeVInt(field.longsSize); |
| - blockOut.writeVLong(field.statsOut.getFilePointer()); |
| - blockOut.writeVLong(field.metaLongsOut.getFilePointer()); |
| - blockOut.writeVLong(field.metaBytesOut.getFilePointer()); |
| - |
| - field.skipOut.writeTo(blockOut); |
| - field.statsOut.writeTo(blockOut); |
| - field.metaLongsOut.writeTo(blockOut); |
| - field.metaBytesOut.writeTo(blockOut); |
| - field.dict.save(indexOut); |
| + writeTrailer(blockOut, blockDirStart); |
| + CodecUtil.writeFooter(indexOut); |
| + CodecUtil.writeFooter(blockOut); |
| + } catch (IOException ioe2) { |
| + ioe = ioe2; |
| + } finally { |
| + IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter); |
| + blockOut = null; |
| } |
| - writeTrailer(blockOut, blockDirStart); |
| - } catch (IOException ioe2) { |
| - ioe = ioe2; |
| - } finally { |
| - IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter); |
| } |
| } |
| |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (working copy) |
| @@ -59,7 +59,7 @@ |
| * FST-based terms dictionary reader. |
| * |
| * The FST directly maps each term and its metadata, |
| - * it is memeory resident. |
| + * it is memory resident. |
| * |
| * @lucene.experimental |
| */ |
| @@ -67,18 +67,21 @@ |
| public class FSTTermsReader extends FieldsProducer { |
| final TreeMap<String, TermsReader> fields = new TreeMap<>(); |
| final PostingsReaderBase postingsReader; |
| - final IndexInput in; |
| //static boolean TEST = false; |
| + final int version; |
| |
| public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { |
| final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION); |
| |
| this.postingsReader = postingsReader; |
| - this.in = state.directory.openInput(termsFileName, state.context); |
| + final IndexInput in = state.directory.openInput(termsFileName, state.context); |
| |
| boolean success = false; |
| try { |
| - readHeader(in); |
| + version = readHeader(in); |
| + if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(in); |
| + } |
| this.postingsReader.init(in); |
| seekDir(in); |
| |
| @@ -92,13 +95,15 @@ |
| long sumDocFreq = in.readVLong(); |
| int docCount = in.readVInt(); |
| int longsSize = in.readVInt(); |
| - TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); |
| + TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); |
| TermsReader previous = fields.put(fieldInfo.name, current); |
| - checkFieldSummary(state.segmentInfo, current, previous); |
| + checkFieldSummary(state.segmentInfo, in, current, previous); |
| } |
| success = true; |
| } finally { |
| - if (!success) { |
| + if (success) { |
| + IOUtils.close(in); |
| + } else { |
| IOUtils.closeWhileHandlingException(in); |
| } |
| } |
| @@ -110,10 +115,14 @@ |
| FSTTermsWriter.TERMS_VERSION_CURRENT); |
| } |
| private void seekDir(IndexInput in) throws IOException { |
| - in.seek(in.length() - 8); |
| + if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { |
| + in.seek(in.length() - CodecUtil.footerLength() - 8); |
| + } else { |
| + in.seek(in.length() - 8); |
| + } |
| in.seek(in.readLong()); |
| } |
| - private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException { |
| + private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException { |
| // #docs with field must be <= #docs |
| if (field.docCount < 0 || field.docCount > info.getDocCount()) { |
| throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); |
| @@ -150,7 +159,7 @@ |
| @Override |
| public void close() throws IOException { |
| try { |
| - IOUtils.close(in, postingsReader); |
| + IOUtils.close(postingsReader); |
| } finally { |
| fields.clear(); |
| } |
| @@ -165,7 +174,7 @@ |
| final int longsSize; |
| final FST<FSTTermOutputs.TermData> dict; |
| |
| - TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { |
| + TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { |
| this.fieldInfo = fieldInfo; |
| this.numTerms = numTerms; |
| this.sumTotalTermFreq = sumTotalTermFreq; |
| @@ -729,4 +738,9 @@ |
| } |
| return ramBytesUsed; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + postingsReader.validate(); |
| + } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (working copy) |
| @@ -124,11 +124,12 @@ |
| static final String TERMS_EXTENSION = "tmp"; |
| static final String TERMS_CODEC_NAME = "FST_TERMS_DICT"; |
| public static final int TERMS_VERSION_START = 0; |
| - public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START; |
| + public static final int TERMS_VERSION_CHECKSUM = 1; |
| + public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM; |
| |
| final PostingsWriterBase postingsWriter; |
| final FieldInfos fieldInfos; |
| - final IndexOutput out; |
| + IndexOutput out; |
| final int maxDoc; |
| final List<FieldMetaData> fields = new ArrayList<>(); |
| |
| @@ -199,28 +200,32 @@ |
| } |
| |
| public void close() throws IOException { |
| - IOException ioe = null; |
| - try { |
| - // write field summary |
| - final long dirStart = out.getFilePointer(); |
| - |
| - out.writeVInt(fields.size()); |
| - for (FieldMetaData field : fields) { |
| - out.writeVInt(field.fieldInfo.number); |
| - out.writeVLong(field.numTerms); |
| - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { |
| - out.writeVLong(field.sumTotalTermFreq); |
| + if (out != null) { |
| + IOException ioe = null; |
| + try { |
| + // write field summary |
| + final long dirStart = out.getFilePointer(); |
| + |
| + out.writeVInt(fields.size()); |
| + for (FieldMetaData field : fields) { |
| + out.writeVInt(field.fieldInfo.number); |
| + out.writeVLong(field.numTerms); |
| + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { |
| + out.writeVLong(field.sumTotalTermFreq); |
| + } |
| + out.writeVLong(field.sumDocFreq); |
| + out.writeVInt(field.docCount); |
| + out.writeVInt(field.longsSize); |
| + field.dict.save(out); |
| } |
| - out.writeVLong(field.sumDocFreq); |
| - out.writeVInt(field.docCount); |
| - out.writeVInt(field.longsSize); |
| - field.dict.save(out); |
| + writeTrailer(out, dirStart); |
| + CodecUtil.writeFooter(out); |
| + } catch (IOException ioe2) { |
| + ioe = ioe2; |
| + } finally { |
| + IOUtils.closeWhileHandlingException(ioe, out, postingsWriter); |
| + out = null; |
| } |
| - writeTrailer(out, dirStart); |
| - } catch (IOException ioe2) { |
| - ioe = ioe2; |
| - } finally { |
| - IOUtils.closeWhileHandlingException(ioe, out, postingsWriter); |
| } |
| } |
| |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java (working copy) |
| @@ -59,7 +59,7 @@ |
| * Writer for {@link MemoryDocValuesFormat} |
| */ |
| class MemoryDocValuesConsumer extends DocValuesConsumer { |
| - final IndexOutput data, meta; |
| + IndexOutput data, meta; |
| final int maxDoc; |
| final float acceptableOverheadRatio; |
| |
| @@ -208,7 +208,11 @@ |
| try { |
| if (meta != null) { |
| meta.writeVInt(-1); // write EOF marker |
| + CodecUtil.writeFooter(meta); // write checksum |
| } |
| + if (data != null) { |
| + CodecUtil.writeFooter(data); |
| + } |
| success = true; |
| } finally { |
| if (success) { |
| @@ -216,6 +220,7 @@ |
| } else { |
| IOUtils.closeWhileHandlingException(data, meta); |
| } |
| + data = meta = null; |
| } |
| } |
| |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (working copy) |
| @@ -37,6 +37,7 @@ |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -77,6 +78,7 @@ |
| |
| private final int maxDoc; |
| private final AtomicLong ramBytesUsed; |
| + private final int version; |
| |
| static final byte NUMBER = 0; |
| static final byte BYTES = 1; |
| @@ -91,15 +93,15 @@ |
| |
| static final int VERSION_START = 0; |
| static final int VERSION_GCD_COMPRESSION = 1; |
| - static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION; |
| + static final int VERSION_CHECKSUM = 2; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| maxDoc = state.segmentInfo.getDocCount(); |
| String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); |
| // read in the entries from the metadata file. |
| - IndexInput in = state.directory.openInput(metaName, state.context); |
| + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); |
| boolean success = false; |
| - final int version; |
| try { |
| version = CodecUtil.checkHeader(in, metaCodec, |
| VERSION_START, |
| @@ -108,6 +110,11 @@ |
| binaries = new HashMap<>(); |
| fsts = new HashMap<>(); |
| readFields(in, state.fieldInfos); |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(in); |
| + } else { |
| + CodecUtil.checkEOF(in); |
| + } |
| ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); |
| success = true; |
| } finally { |
| @@ -208,6 +215,13 @@ |
| return ramBytesUsed.get(); |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(data); |
| + } |
| + } |
| + |
| private NumericDocValues loadNumeric(FieldInfo field) throws IOException { |
| NumericEntry entry = numerics.get(field.number); |
| data.seek(entry.offset + entry.missingBytes); |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (working copy) |
| @@ -25,6 +25,7 @@ |
| import java.util.SortedMap; |
| import java.util.TreeMap; |
| |
| +import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| @@ -41,6 +42,7 @@ |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| @@ -271,6 +273,9 @@ |
| } |
| |
| private static String EXTENSION = "ram"; |
| + private static final String CODEC_NAME = "MemoryPostings"; |
| + private static final int VERSION_START = 0; |
| + private static final int VERSION_CURRENT = VERSION_START; |
| |
| private class MemoryFieldsConsumer extends FieldsConsumer implements Closeable { |
| private final SegmentWriteState state; |
| @@ -279,6 +284,7 @@ |
| private MemoryFieldsConsumer(SegmentWriteState state) throws IOException { |
| final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); |
| out = state.directory.createOutput(fileName, state.context); |
| + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); |
| this.state = state; |
| } |
| |
| @@ -403,6 +409,7 @@ |
| // EOF marker: |
| try { |
| out.writeVInt(0); |
| + CodecUtil.writeFooter(out); |
| } finally { |
| out.close(); |
| } |
| @@ -951,7 +958,8 @@ |
| @Override |
| public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); |
| - final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE); |
| + final ChecksumIndexInput in = state.directory.openChecksumInput(fileName, IOContext.READONCE); |
| + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); |
| |
| final SortedMap<String,TermsReader> fields = new TreeMap<>(); |
| |
| @@ -965,6 +973,7 @@ |
| // System.out.println("load field=" + termsReader.field.name); |
| fields.put(termsReader.field.name, termsReader); |
| } |
| + CodecUtil.checkFooter(in); |
| } finally { |
| in.close(); |
| } |
| @@ -1002,6 +1011,9 @@ |
| } |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| }; |
| } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (working copy) |
| @@ -653,4 +653,9 @@ |
| public long ramBytesUsed() { |
| return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + wrappedPostingsReader.validate(); |
| + } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (working copy) |
| @@ -706,4 +706,9 @@ |
| public long ramBytesUsed() { |
| return 0; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + // TODO: remove sep layout, its fallen behind on features... |
| + } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (working copy) |
| @@ -17,6 +17,7 @@ |
| * limitations under the License. |
| */ |
| |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM; |
| import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END; |
| import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD; |
| import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; |
| @@ -47,6 +48,8 @@ |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| +import org.apache.lucene.store.BufferedChecksumIndexInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -467,4 +470,19 @@ |
| public long ramBytesUsed() { |
| return 0; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + BytesRef scratch = new BytesRef(); |
| + IndexInput clone = data.clone(); |
| + clone.seek(0); |
| + ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); |
| + while(true) { |
| + SimpleTextUtil.readLine(input, scratch); |
| + if (scratch.equals(END)) { |
| + SimpleTextUtil.checkFooter(input, CHECKSUM); |
| + break; |
| + } |
| + } |
| + } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (working copy) |
| @@ -36,6 +36,7 @@ |
| import org.apache.lucene.util.IOUtils; |
| |
| class SimpleTextDocValuesWriter extends DocValuesConsumer { |
| + final static BytesRef CHECKSUM = new BytesRef("checksum "); |
| final static BytesRef END = new BytesRef("END"); |
| final static BytesRef FIELD = new BytesRef("field "); |
| final static BytesRef TYPE = new BytesRef(" type "); |
| @@ -49,7 +50,7 @@ |
| final static BytesRef NUMVALUES = new BytesRef(" numvalues "); |
| final static BytesRef ORDPATTERN = new BytesRef(" ordpattern "); |
| |
| - final IndexOutput data; |
| + IndexOutput data; |
| final BytesRef scratch = new BytesRef(); |
| final int numDocs; |
| private final Set<String> fieldsSeen = new HashSet<>(); // for asserting |
| @@ -389,18 +390,25 @@ |
| |
| @Override |
| public void close() throws IOException { |
| - boolean success = false; |
| - try { |
| - assert !fieldsSeen.isEmpty(); |
| - // TODO: sheisty to do this here? |
| - SimpleTextUtil.write(data, END); |
| - SimpleTextUtil.writeNewline(data); |
| - success = true; |
| - } finally { |
| - if (success) { |
| - IOUtils.close(data); |
| - } else { |
| - IOUtils.closeWhileHandlingException(data); |
| + if (data != null) { |
| + boolean success = false; |
| + try { |
| + assert !fieldsSeen.isEmpty(); |
| + // TODO: sheisty to do this here? |
| + SimpleTextUtil.write(data, END); |
| + SimpleTextUtil.writeNewline(data); |
| + String checksum = Long.toString(data.getChecksum()); |
| + SimpleTextUtil.write(data, CHECKSUM); |
| + SimpleTextUtil.write(data, checksum, scratch); |
| + SimpleTextUtil.writeNewline(data); |
| + success = true; |
| + } finally { |
| + if (success) { |
| + IOUtils.close(data); |
| + } else { |
| + IOUtils.closeWhileHandlingException(data); |
| + } |
| + data = null; |
| } |
| } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (working copy) |
| @@ -23,15 +23,14 @@ |
| import java.util.Map; |
| |
| import org.apache.lucene.codecs.FieldInfosReader; |
| -import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfo.DocValuesType; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.FieldInfo.IndexOptions; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.StringHelper; |
| @@ -49,7 +48,7 @@ |
| @Override |
| public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException { |
| final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); |
| - IndexInput input = directory.openInput(fileName, iocontext); |
| + ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext); |
| BytesRef scratch = new BytesRef(); |
| |
| boolean success = false; |
| @@ -129,9 +128,7 @@ |
| infos[i].setDocValuesGen(dvGen); |
| } |
| |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| + SimpleTextUtil.checkFooter(input, CHECKSUM); |
| |
| FieldInfos fieldInfos = new FieldInfos(infos); |
| success = true; |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (working copy) |
| @@ -58,6 +58,7 @@ |
| static final BytesRef NUM_ATTS = new BytesRef(" attributes "); |
| final static BytesRef ATT_KEY = new BytesRef(" key "); |
| final static BytesRef ATT_VALUE = new BytesRef(" value "); |
| + final static BytesRef CHECKSUM = new BytesRef("checksum "); |
| |
| @Override |
| public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { |
| @@ -132,6 +133,10 @@ |
| } |
| } |
| } |
| + String checksum = Long.toString(out.getChecksum()); |
| + SimpleTextUtil.write(out, CHECKSUM); |
| + SimpleTextUtil.write(out, checksum, scratch); |
| + SimpleTextUtil.writeNewline(out); |
| success = true; |
| } finally { |
| if (success) { |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (working copy) |
| @@ -33,6 +33,8 @@ |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.store.BufferedChecksumIndexInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.Bits; |
| @@ -50,6 +52,17 @@ |
| import org.apache.lucene.util.fst.PositiveIntOutputs; |
| import org.apache.lucene.util.fst.Util; |
| |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET; |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD; |
| + |
| class SimpleTextFieldsReader extends FieldsProducer { |
| private final TreeMap<String,Long> fields; |
| private final IndexInput in; |
| @@ -56,16 +69,6 @@ |
| private final FieldInfos fieldInfos; |
| private final int maxDoc; |
| |
| - final static BytesRef END = SimpleTextFieldsWriter.END; |
| - final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD; |
| - final static BytesRef TERM = SimpleTextFieldsWriter.TERM; |
| - final static BytesRef DOC = SimpleTextFieldsWriter.DOC; |
| - final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ; |
| - final static BytesRef POS = SimpleTextFieldsWriter.POS; |
| - final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET; |
| - final static BytesRef END_OFFSET = SimpleTextFieldsWriter.END_OFFSET; |
| - final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD; |
| - |
| public SimpleTextFieldsReader(SegmentReadState state) throws IOException { |
| this.maxDoc = state.segmentInfo.getDocCount(); |
| fieldInfos = state.fieldInfos; |
| @@ -82,16 +85,18 @@ |
| } |
| |
| private TreeMap<String,Long> readFields(IndexInput in) throws IOException { |
| + ChecksumIndexInput input = new BufferedChecksumIndexInput(in); |
| BytesRef scratch = new BytesRef(10); |
| TreeMap<String,Long> fields = new TreeMap<>(); |
| |
| while (true) { |
| - SimpleTextUtil.readLine(in, scratch); |
| + SimpleTextUtil.readLine(input, scratch); |
| if (scratch.equals(END)) { |
| + SimpleTextUtil.checkFooter(input, CHECKSUM); |
| return fields; |
| } else if (StringHelper.startsWith(scratch, FIELD)) { |
| String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"); |
| - fields.put(fieldName, in.getFilePointer()); |
| + fields.put(fieldName, input.getFilePointer()); |
| } |
| } |
| } |
| @@ -668,4 +673,7 @@ |
| } |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (working copy) |
| @@ -35,10 +35,11 @@ |
| |
| class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable { |
| |
| - private final IndexOutput out; |
| + private IndexOutput out; |
| private final BytesRef scratch = new BytesRef(10); |
| private final SegmentWriteState writeState; |
| |
| + final static BytesRef CHECKSUM = new BytesRef("checksum "); |
| final static BytesRef END = new BytesRef("END"); |
| final static BytesRef FIELD = new BytesRef("field "); |
| final static BytesRef TERM = new BytesRef(" term "); |
| @@ -215,11 +216,18 @@ |
| |
| @Override |
| public void close() throws IOException { |
| - try { |
| - write(END); |
| - newline(); |
| - } finally { |
| - out.close(); |
| + if (out != null) { |
| + try { |
| + write(END); |
| + newline(); |
| + String checksum = Long.toString(out.getChecksum()); |
| + write(CHECKSUM); |
| + write(checksum); |
| + newline(); |
| + } finally { |
| + out.close(); |
| + out = null; |
| + } |
| } |
| } |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java (working copy) |
| @@ -24,9 +24,9 @@ |
| import org.apache.lucene.codecs.LiveDocsFormat; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentCommitInfo; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.Bits; |
| @@ -50,6 +50,7 @@ |
| final static BytesRef SIZE = new BytesRef("size "); |
| final static BytesRef DOC = new BytesRef(" doc "); |
| final static BytesRef END = new BytesRef("END"); |
| + final static BytesRef CHECKSUM = new BytesRef("checksum "); |
| |
| @Override |
| public MutableBits newLiveDocs(int size) throws IOException { |
| @@ -69,10 +70,10 @@ |
| CharsRef scratchUTF16 = new CharsRef(); |
| |
| String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen()); |
| - IndexInput in = null; |
| + ChecksumIndexInput in = null; |
| boolean success = false; |
| try { |
| - in = dir.openInput(fileName, context); |
| + in = dir.openChecksumInput(fileName, context); |
| |
| SimpleTextUtil.readLine(in, scratch); |
| assert StringHelper.startsWith(scratch, SIZE); |
| @@ -88,6 +89,8 @@ |
| SimpleTextUtil.readLine(in, scratch); |
| } |
| |
| + SimpleTextUtil.checkFooter(in, CHECKSUM); |
| + |
| success = true; |
| return new SimpleTextBits(bits, size); |
| } finally { |
| @@ -127,6 +130,10 @@ |
| |
| SimpleTextUtil.write(out, END); |
| SimpleTextUtil.writeNewline(out); |
| + String checksum = Long.toString(out.getChecksum()); |
| + SimpleTextUtil.write(out, CHECKSUM); |
| + SimpleTextUtil.write(out, checksum, scratch); |
| + SimpleTextUtil.writeNewline(out); |
| success = true; |
| } finally { |
| if (success) { |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (working copy) |
| @@ -17,6 +17,7 @@ |
| * limitations under the License. |
| */ |
| |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM; |
| import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY; |
| import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE; |
| import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT; |
| @@ -35,9 +36,9 @@ |
| import org.apache.lucene.codecs.SegmentInfoReader; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.StringHelper; |
| @@ -54,7 +55,7 @@ |
| public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { |
| BytesRef scratch = new BytesRef(); |
| String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); |
| - IndexInput input = directory.openInput(segFileName, context); |
| + ChecksumIndexInput input = directory.openChecksumInput(segFileName, context); |
| boolean success = false; |
| try { |
| SimpleTextUtil.readLine(input, scratch); |
| @@ -96,6 +97,8 @@ |
| String fileName = readString(SI_FILE.length, scratch); |
| files.add(fileName); |
| } |
| + |
| + SimpleTextUtil.checkFooter(input, SI_CHECKSUM); |
| |
| SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, |
| isCompoundFile, null, diagnostics); |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (working copy) |
| @@ -47,6 +47,7 @@ |
| final static BytesRef SI_DIAG_VALUE = new BytesRef(" value "); |
| final static BytesRef SI_NUM_FILES = new BytesRef(" files "); |
| final static BytesRef SI_FILE = new BytesRef(" file "); |
| + final static BytesRef SI_CHECKSUM = new BytesRef(" checksum "); |
| |
| @Override |
| public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { |
| @@ -55,7 +56,7 @@ |
| si.addFile(segFileName); |
| |
| boolean success = false; |
| - IndexOutput output = dir.createOutput(segFileName, ioContext); |
| + IndexOutput output = dir.createOutput(segFileName, ioContext); |
| |
| try { |
| BytesRef scratch = new BytesRef(); |
| @@ -103,6 +104,11 @@ |
| SimpleTextUtil.writeNewline(output); |
| } |
| } |
| + |
| + String checksum = Long.toString(output.getChecksum()); |
| + SimpleTextUtil.write(output, SI_CHECKSUM); |
| + SimpleTextUtil.write(output, checksum, scratch); |
| + SimpleTextUtil.writeNewline(output); |
| success = true; |
| } finally { |
| if (!success) { |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (working copy) |
| @@ -26,6 +26,8 @@ |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.index.StoredFieldVisitor; |
| import org.apache.lucene.store.AlreadyClosedException; |
| +import org.apache.lucene.store.BufferedChecksumIndexInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| @@ -78,15 +80,17 @@ |
| // stored fields file in entirety up-front and save the offsets |
| // so we can seek to the documents later. |
| private void readIndex(int size) throws IOException { |
| + ChecksumIndexInput input = new BufferedChecksumIndexInput(in); |
| offsets = new long[size]; |
| int upto = 0; |
| while (!scratch.equals(END)) { |
| - readLine(); |
| + SimpleTextUtil.readLine(input, scratch); |
| if (StringHelper.startsWith(scratch, DOC)) { |
| - offsets[upto] = in.getFilePointer(); |
| + offsets[upto] = input.getFilePointer(); |
| upto++; |
| } |
| } |
| + SimpleTextUtil.checkFooter(input, CHECKSUM); |
| assert upto == offsets.length; |
| } |
| |
| @@ -188,6 +192,11 @@ |
| return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); |
| } |
| |
| + private String readString(int offset, BytesRef scratch) { |
| + UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16); |
| + return scratchUTF16.toString(); |
| + } |
| + |
| private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) { |
| return a.length == b.length - bOffset && |
| ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset); |
| @@ -197,4 +206,7 @@ |
| public long ramBytesUsed() { |
| return 0; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java (working copy) |
| @@ -51,13 +51,14 @@ |
| final static BytesRef TYPE_FLOAT = new BytesRef("float"); |
| final static BytesRef TYPE_DOUBLE = new BytesRef("double"); |
| |
| - final static BytesRef END = new BytesRef("END"); |
| - final static BytesRef DOC = new BytesRef("doc "); |
| - final static BytesRef NUM = new BytesRef(" numfields "); |
| - final static BytesRef FIELD = new BytesRef(" field "); |
| - final static BytesRef NAME = new BytesRef(" name "); |
| - final static BytesRef TYPE = new BytesRef(" type "); |
| - final static BytesRef VALUE = new BytesRef(" value "); |
| + final static BytesRef CHECKSUM = new BytesRef("checksum "); |
| + final static BytesRef END = new BytesRef("END"); |
| + final static BytesRef DOC = new BytesRef("doc "); |
| + final static BytesRef NUM = new BytesRef(" numfields "); |
| + final static BytesRef FIELD = new BytesRef(" field "); |
| + final static BytesRef NAME = new BytesRef(" name "); |
| + final static BytesRef TYPE = new BytesRef(" type "); |
| + final static BytesRef VALUE = new BytesRef(" value "); |
| |
| private final BytesRef scratch = new BytesRef(); |
| |
| @@ -171,6 +172,10 @@ |
| } |
| write(END); |
| newLine(); |
| + String checksum = Long.toString(out.getChecksum()); |
| + write(CHECKSUM); |
| + write(checksum); |
| + newLine(); |
| } |
| |
| @Override |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (working copy) |
| @@ -33,6 +33,8 @@ |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.store.AlreadyClosedException; |
| +import org.apache.lucene.store.BufferedChecksumIndexInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| @@ -82,15 +84,17 @@ |
| // vectors file in entirety up-front and save the offsets |
| // so we can seek to the data later. |
| private void readIndex(int maxDoc) throws IOException { |
| + ChecksumIndexInput input = new BufferedChecksumIndexInput(in); |
| offsets = new long[maxDoc]; |
| int upto = 0; |
| while (!scratch.equals(END)) { |
| - readLine(); |
| + SimpleTextUtil.readLine(input, scratch); |
| if (StringHelper.startsWith(scratch, DOC)) { |
| - offsets[upto] = in.getFilePointer(); |
| + offsets[upto] = input.getFilePointer(); |
| upto++; |
| } |
| } |
| + SimpleTextUtil.checkFooter(input, CHECKSUM); |
| assert upto == offsets.length; |
| } |
| |
| @@ -537,4 +541,7 @@ |
| public long ramBytesUsed() { |
| return 0; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| } |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (working copy) |
| @@ -37,6 +37,7 @@ |
| */ |
| public class SimpleTextTermVectorsWriter extends TermVectorsWriter { |
| |
| + static final BytesRef CHECKSUM = new BytesRef("checksum "); |
| static final BytesRef END = new BytesRef("END"); |
| static final BytesRef DOC = new BytesRef("doc "); |
| static final BytesRef NUMFIELDS = new BytesRef(" numfields "); |
| @@ -177,6 +178,10 @@ |
| } |
| write(END); |
| newLine(); |
| + String checksum = Long.toString(out.getChecksum()); |
| + write(CHECKSUM); |
| + write(checksum); |
| + newLine(); |
| } |
| |
| @Override |
| Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java |
| =================================================================== |
| --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java (revision 1583220) |
| +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java (working copy) |
| @@ -17,11 +17,16 @@ |
| * limitations under the License. |
| */ |
| |
| +import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM; |
| + |
| import java.io.IOException; |
| |
| +import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.UnicodeUtil; |
| |
| class SimpleTextUtil { |
| @@ -67,4 +72,18 @@ |
| scratch.offset = 0; |
| scratch.length = upto; |
| } |
| + |
| + public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException { |
| + BytesRef scratch = new BytesRef(); |
| + String expectedChecksum = Long.toString(input.getChecksum()); |
| + SimpleTextUtil.readLine(input, scratch); |
| + assert StringHelper.startsWith(scratch, prefix); |
| + String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString(); |
| + if (!expectedChecksum.equals(actualChecksum)) { |
| + throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")"); |
| + } |
| + if (input.length() != input.getFilePointer()) { |
| + throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")"); |
| + } |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (working copy) |
| @@ -131,6 +131,11 @@ |
| if (indexVersion != version) { |
| throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion); |
| } |
| + |
| + // verify |
| + if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(indexIn); |
| + } |
| |
| // Have PostingsReader init itself |
| postingsReader.init(in); |
| @@ -157,7 +162,7 @@ |
| final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); |
| final long sumDocFreq = in.readVLong(); |
| final int docCount = in.readVInt(); |
| - final int longsSize = version >= BlockTreeTermsWriter.TERMS_VERSION_META_ARRAY ? in.readVInt() : 0; |
| + final int longsSize = version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0; |
| if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs |
| throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); |
| } |
| @@ -187,9 +192,9 @@ |
| /** Reads terms file header. */ |
| private int readHeader(IndexInput input) throws IOException { |
| int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME, |
| - BlockTreeTermsWriter.TERMS_VERSION_START, |
| - BlockTreeTermsWriter.TERMS_VERSION_CURRENT); |
| - if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) { |
| + BlockTreeTermsWriter.VERSION_START, |
| + BlockTreeTermsWriter.VERSION_CURRENT); |
| + if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) { |
| dirOffset = input.readLong(); |
| } |
| return version; |
| @@ -198,9 +203,9 @@ |
| /** Reads index file header. */ |
| private int readIndexHeader(IndexInput input) throws IOException { |
| int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME, |
| - BlockTreeTermsWriter.TERMS_INDEX_VERSION_START, |
| - BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT); |
| - if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) { |
| + BlockTreeTermsWriter.VERSION_START, |
| + BlockTreeTermsWriter.VERSION_CURRENT); |
| + if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) { |
| indexDirOffset = input.readLong(); |
| } |
| return version; |
| @@ -209,7 +214,10 @@ |
| /** Seek {@code input} to the directory offset. */ |
| private void seekDir(IndexInput input, long dirOffset) |
| throws IOException { |
| - if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) { |
| + if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { |
| + input.seek(input.length() - CodecUtil.footerLength() - 8); |
| + dirOffset = input.readLong(); |
| + } else if (version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) { |
| input.seek(input.length() - 8); |
| dirOffset = input.readLong(); |
| } |
| @@ -2977,4 +2985,15 @@ |
| } |
| return sizeInByes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { |
| + // term dictionary |
| + CodecUtil.checksumEntireFile(in); |
| + |
| + // postings |
| + postingsReader.validate(); |
| + } |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (working copy) |
| @@ -109,7 +109,7 @@ |
| * |
| * <ul> |
| * <li>TermsDict (.tim) --> Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>, |
| - * FieldSummary, DirOffset</li> |
| + * FieldSummary, DirOffset, Footer</li> |
| * <li>NodeBlock --> (OuterNode | InnerNode)</li> |
| * <li>OuterNode --> EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata</i>><sup>EntryCount</sup></li> |
| * <li>InnerNode --> EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ? ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata ? </i>><sup>EntryCount</sup></li> |
| @@ -122,6 +122,7 @@ |
| * FieldNumber,RootCodeLength,DocCount --> {@link DataOutput#writeVInt VInt}</li> |
| * <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --> |
| * {@link DataOutput#writeVLong VLong}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| @@ -150,12 +151,13 @@ |
| * when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p> |
| * <ul> |
| * <li>TermsIndex (.tip) --> Header, FSTIndex<sup>NumFields</sup> |
| - * <IndexStartFP><sup>NumFields</sup>, DirOffset</li> |
| + * <IndexStartFP><sup>NumFields</sup>, DirOffset, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li> |
| * <li>IndexStartFP --> {@link DataOutput#writeVLong VLong}</li> |
| * <!-- TODO: better describe FST output here --> |
| * <li>FSTIndex --> {@link FST FST<byte[]>}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| @@ -178,7 +180,6 @@ |
| * @see BlockTreeTermsReader |
| * @lucene.experimental |
| */ |
| - |
| public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable { |
| |
| /** Suggested default value for the {@code |
| @@ -204,33 +205,24 @@ |
| final static String TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT"; |
| |
| /** Initial terms format. */ |
| - public static final int TERMS_VERSION_START = 0; |
| + public static final int VERSION_START = 0; |
| |
| /** Append-only */ |
| - public static final int TERMS_VERSION_APPEND_ONLY = 1; |
| + public static final int VERSION_APPEND_ONLY = 1; |
| |
| /** Meta data as array */ |
| - public static final int TERMS_VERSION_META_ARRAY = 2; |
| + public static final int VERSION_META_ARRAY = 2; |
| + |
| + /** checksums */ |
| + public static final int VERSION_CHECKSUM = 3; |
| |
| /** Current terms format. */ |
| - public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_META_ARRAY; |
| + public static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| /** Extension of terms index file */ |
| static final String TERMS_INDEX_EXTENSION = "tip"; |
| final static String TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX"; |
| |
| - /** Initial index format. */ |
| - public static final int TERMS_INDEX_VERSION_START = 0; |
| - |
| - /** Append-only */ |
| - public static final int TERMS_INDEX_VERSION_APPEND_ONLY = 1; |
| - |
| - /** Meta data as array */ |
| - public static final int TERMS_INDEX_VERSION_META_ARRAY = 2; |
| - |
| - /** Current index format. */ |
| - public static final int TERMS_INDEX_VERSION_CURRENT = TERMS_INDEX_VERSION_META_ARRAY; |
| - |
| private final IndexOutput out; |
| private final IndexOutput indexOut; |
| final int maxDoc; |
| @@ -326,12 +318,12 @@ |
| |
| /** Writes the terms file header. */ |
| private void writeHeader(IndexOutput out) throws IOException { |
| - CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT); |
| + CodecUtil.writeHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT); |
| } |
| |
| /** Writes the index file header. */ |
| private void writeIndexHeader(IndexOutput out) throws IOException { |
| - CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT); |
| + CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT); |
| } |
| |
| /** Writes the terms file trailer. */ |
| @@ -1139,13 +1131,13 @@ |
| } |
| out.writeVLong(field.sumDocFreq); |
| out.writeVInt(field.docCount); |
| - if (TERMS_VERSION_CURRENT >= TERMS_VERSION_META_ARRAY) { |
| - out.writeVInt(field.longsSize); |
| - } |
| + out.writeVInt(field.longsSize); |
| indexOut.writeVLong(field.indexStartFP); |
| } |
| writeTrailer(out, dirStart); |
| + CodecUtil.writeFooter(out); |
| writeIndexTrailer(indexOut, indexDirStart); |
| + CodecUtil.writeFooter(indexOut); |
| } catch (IOException ioe2) { |
| ioe = ioe2; |
| } finally { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java (working copy) |
| @@ -23,8 +23,12 @@ |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.IndexFormatTooNewException; |
| import org.apache.lucene.index.IndexFormatTooOldException; |
| +import org.apache.lucene.store.BufferedChecksumIndexInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** |
| @@ -43,6 +47,10 @@ |
| * Constant to identify the start of a codec header. |
| */ |
| public final static int CODEC_MAGIC = 0x3fd76c17; |
| + /** |
| + * Constant to identify the start of a codec footer. |
| + */ |
| + public final static int FOOTER_MAGIC = ~CODEC_MAGIC; |
| |
| /** |
| * Writes a codec header, which records both a string to |
| @@ -150,4 +158,119 @@ |
| |
| return actualVersion; |
| } |
| + |
| + /** |
| + * Writes a codec footer, which records both a checksum |
| + * algorithm ID and a checksum. This footer can |
| + * be parsed and validated with |
| + * {@link #checkFooter(ChecksumIndexInput) checkFooter()}. |
| + * <p> |
| + * CodecFooter --> Magic,AlgorithmID,Checksum |
| + * <ul> |
| + * <li>Magic --> {@link DataOutput#writeInt Uint32}. This |
| + * identifies the start of the footer. It is always {@value #FOOTER_MAGIC}. |
| + * <li>AlgorithmID --> {@link DataOutput#writeInt Uint32}. This |
| + * indicates the checksum algorithm used. Currently this is always 0, |
| + * for zlib-crc32. |
| + * <li>Checksum --> {@link DataOutput#writeLong Uint32}. The |
| + * actual checksum value for all previous bytes in the stream, including |
| + * the bytes from Magic and AlgorithmID. |
| + * </ul> |
| + * |
| + * @param out Output stream |
| + * @throws IOException If there is an I/O error writing to the underlying medium. |
| + */ |
| + public static void writeFooter(IndexOutput out) throws IOException { |
| + out.writeInt(FOOTER_MAGIC); |
| + out.writeInt(0); |
| + out.writeLong(out.getChecksum()); |
| + } |
| + |
| + /** |
| + * Computes the length of a codec footer. |
| + * |
| + * @return length of the entire codec footer. |
| + * @see #writeFooter(IndexOutput) |
| + */ |
| + public static int footerLength() { |
| + return 16; |
| + } |
| + |
| + /** |
| + * Validates the codec footer previously written by {@link #writeFooter}. |
| + * @return actual checksum value |
| + * @throws IOException if the footer is invalid, if the checksum does not match, |
| + * or if {@code in} is not properly positioned before the footer |
| + * at the end of the stream. |
| + */ |
| + public static long checkFooter(ChecksumIndexInput in) throws IOException { |
| + validateFooter(in); |
| + long actualChecksum = in.getChecksum(); |
| + long expectedChecksum = in.readLong(); |
| + if (expectedChecksum != actualChecksum) { |
| + throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) + |
| + " actual=" + Long.toHexString(actualChecksum) + |
| + " (resource=" + in + ")"); |
| + } |
| + if (in.getFilePointer() != in.length()) { |
| + throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); |
| + } |
| + return actualChecksum; |
| + } |
| + |
| + /** |
| + * Returns (but does not validate) the checksum previously written by {@link #checkFooter}. |
| + * @return actual checksum value |
| + * @throws IOException if the footer is invalid |
| + */ |
| + public static long retrieveChecksum(IndexInput in) throws IOException { |
| + in.seek(in.length() - footerLength()); |
| + validateFooter(in); |
| + return in.readLong(); |
| + } |
| + |
| + private static void validateFooter(IndexInput in) throws IOException { |
| + final int magic = in.readInt(); |
| + if (magic != FOOTER_MAGIC) { |
| + throw new CorruptIndexException("codec footer mismatch: actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC + " (resource: " + in + ")"); |
| + } |
| + |
| + final int algorithmID = in.readInt(); |
| + if (algorithmID != 0) { |
| + throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID); |
| + } |
| + } |
| + |
| + /** |
| + * Checks that the stream is positioned at the end, and throws exception |
| + * if it is not. |
| + * @deprecated Use {@link #checkFooter} instead, this should only used for files without checksums |
| + */ |
| + @Deprecated |
| + public static void checkEOF(IndexInput in) throws IOException { |
| + if (in.getFilePointer() != in.length()) { |
| + throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); |
| + } |
| + } |
| + |
| + /** |
| + * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter} |
| + * <p> |
| + * Note that this method may be slow, as it must process the entire file. |
| + * If you just need to extract the checksum value, call {@link #retrieveChecksum}. |
| + */ |
| + public static long checksumEntireFile(IndexInput input) throws IOException { |
| + IndexInput clone = input.clone(); |
| + clone.seek(0); |
| + ChecksumIndexInput in = new BufferedChecksumIndexInput(clone); |
| + assert in.getFilePointer() == 0; |
| + final byte[] buffer = new byte[1024]; |
| + long bytesToRead = in.length() - footerLength(); |
| + for (long skipped = 0; skipped < bytesToRead; ) { |
| + final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length); |
| + in.readBytes(buffer, 0, toRead); |
| + skipped += toRead; |
| + } |
| + return checkFooter(in); |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java (working copy) |
| @@ -68,6 +68,14 @@ |
| public abstract long ramBytesUsed(); |
| |
| /** |
| + * Validates/runs consistency checks on this producer. |
| + * <p> |
| + * Note that this may be costly in terms of I/O, e.g. |
| + * may involve computing a checksum value against large data files. |
| + */ |
| + public abstract void validate() throws IOException; |
| + |
| + /** |
| * A simple implementation of {@link DocValuesProducer#getDocsWithField} that |
| * returns {@code true} if a document has an ordinal >= 0 |
| * <p> |
| Index: lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java (working copy) |
| @@ -39,4 +39,12 @@ |
| |
| /** Returns approximate RAM bytes used */ |
| public abstract long ramBytesUsed(); |
| + |
| + /** |
| + * Validates/runs consistency checks on this reader. |
| + * <p> |
| + * Note that this may be costly in terms of I/O, e.g. |
| + * may involve computing a checksum value against large data files. |
| + */ |
| + public abstract void validate() throws IOException; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (working copy) |
| @@ -72,6 +72,14 @@ |
| /** Returns approximate RAM bytes used */ |
| public abstract long ramBytesUsed(); |
| |
| + /** |
| + * Validates/runs consistency checks on this reader. |
| + * <p> |
| + * Note that this may be costly in terms of I/O, e.g. |
| + * may involve computing a checksum value against large data files. |
| + */ |
| + public abstract void validate() throws IOException; |
| + |
| @Override |
| public abstract void close() throws IOException; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (working copy) |
| @@ -43,4 +43,12 @@ |
| |
| /** Returns approximate RAM bytes used */ |
| public abstract long ramBytesUsed(); |
| + |
| + /** |
| + * Validates/runs consistency checks on this reader. |
| + * <p> |
| + * Note that this may be costly in terms of I/O, e.g. |
| + * may involve computing a checksum value against large data files. |
| + */ |
| + public abstract void validate() throws IOException; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java (working copy) |
| @@ -45,6 +45,14 @@ |
| /** Returns approximate RAM bytes used */ |
| public abstract long ramBytesUsed(); |
| |
| + /** |
| + * Validates/runs consistency checks on this reader. |
| + * <p> |
| + * Note that this may be costly in terms of I/O, e.g. |
| + * may involve computing a checksum value against large data files. |
| + */ |
| + public abstract void validate() throws IOException; |
| + |
| /** Create a clone that one caller at a time may use to |
| * read term vectors. */ |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java (working copy) |
| @@ -21,6 +21,7 @@ |
| import java.io.IOException; |
| |
| import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.packed.PackedInts; |
| @@ -52,6 +53,7 @@ |
| * <li>AvgChunkSize --> the average size of a chunk of compressed documents, as a {@link DataOutput#writeVLong VLong}</li> |
| * <li>BitsPerStartPointerDelta --> number of bits required to represent a delta from the average using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li> |
| * <li>StartPointerDeltas --> {@link PackedInts packed} array of BlockChunks elements of BitsPerStartPointerDelta bits each, representing the deltas from the average start pointer using <a href="https://developers.google.com/protocol-buffers/docs/encoding#types">ZigZag encoding</a></li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes</p> |
| * <ul> |
| @@ -198,6 +200,7 @@ |
| writeBlock(); |
| } |
| fieldsIndexOut.writeVInt(0); // end marker |
| + CodecUtil.writeFooter(fieldsIndexOut); |
| } |
| |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (working copy) |
| @@ -28,6 +28,7 @@ |
| import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS; |
| import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK; |
| import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS; |
| +import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM; |
| import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT; |
| import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START; |
| import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION; |
| @@ -47,6 +48,7 @@ |
| import org.apache.lucene.index.StoredFieldVisitor; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.Directory; |
| @@ -113,17 +115,20 @@ |
| boolean success = false; |
| fieldInfos = fn; |
| numDocs = si.getDocCount(); |
| - IndexInput indexStream = null; |
| + ChecksumIndexInput indexStream = null; |
| try { |
| // Load the index into memory |
| final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION); |
| - indexStream = d.openInput(indexStreamFN, context); |
| + indexStream = d.openChecksumInput(indexStreamFN, context); |
| final String codecNameIdx = formatName + CODEC_SFX_IDX; |
| version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); |
| assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); |
| indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); |
| - if (indexStream.getFilePointer() != indexStream.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")"); |
| + |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(indexStream); |
| + } else { |
| + CodecUtil.checkEOF(indexStream); |
| } |
| indexStream.close(); |
| indexStream = null; |
| @@ -509,4 +514,11 @@ |
| return indexReader.ramBytesUsed(); |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(fieldsStream); |
| + } |
| + } |
| + |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (working copy) |
| @@ -71,7 +71,8 @@ |
| static final String CODEC_SFX_DAT = "Data"; |
| static final int VERSION_START = 0; |
| static final int VERSION_BIG_CHUNKS = 1; |
| - static final int VERSION_CURRENT = VERSION_BIG_CHUNKS; |
| + static final int VERSION_CHECKSUM = 2; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| private final Directory directory; |
| private final String segment; |
| @@ -106,9 +107,11 @@ |
| this.numBufferedDocs = 0; |
| |
| boolean success = false; |
| - IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); |
| + IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), |
| + context); |
| try { |
| - fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); |
| + fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), |
| + context); |
| |
| final String codecNameIdx = formatName + CODEC_SFX_IDX; |
| final String codecNameDat = formatName + CODEC_SFX_DAT; |
| @@ -314,6 +317,7 @@ |
| throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs); |
| } |
| indexWriter.finish(numDocs); |
| + CodecUtil.writeFooter(fieldsStream); |
| assert bufferedDocs.length == 0; |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (working copy) |
| @@ -28,6 +28,7 @@ |
| import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION; |
| import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT; |
| import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START; |
| +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM; |
| |
| import java.io.Closeable; |
| import java.io.IOException; |
| @@ -48,6 +49,7 @@ |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| @@ -69,6 +71,7 @@ |
| private final FieldInfos fieldInfos; |
| final CompressingStoredFieldsIndexReader indexReader; |
| final IndexInput vectorsStream; |
| + private final int version; |
| private final int packedIntsVersion; |
| private final CompressionMode compressionMode; |
| private final Decompressor decompressor; |
| @@ -88,6 +91,7 @@ |
| this.chunkSize = reader.chunkSize; |
| this.numDocs = reader.numDocs; |
| this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0); |
| + this.version = reader.version; |
| this.closed = false; |
| } |
| |
| @@ -99,17 +103,20 @@ |
| boolean success = false; |
| fieldInfos = fn; |
| numDocs = si.getDocCount(); |
| - IndexInput indexStream = null; |
| + ChecksumIndexInput indexStream = null; |
| try { |
| // Load the index into memory |
| final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION); |
| - indexStream = d.openInput(indexStreamFN, context); |
| + indexStream = d.openChecksumInput(indexStreamFN, context); |
| final String codecNameIdx = formatName + CODEC_SFX_IDX; |
| - int version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); |
| + version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); |
| assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); |
| indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); |
| - if (indexStream.getFilePointer() != indexStream.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")"); |
| + |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(indexStream); |
| + } else { |
| + CodecUtil.checkEOF(indexStream); |
| } |
| indexStream.close(); |
| indexStream = null; |
| @@ -1045,5 +1052,12 @@ |
| public long ramBytesUsed() { |
| return indexReader.ramBytesUsed(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(vectorsStream); |
| + } |
| + } |
| |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (working copy) |
| @@ -66,7 +66,8 @@ |
| static final String CODEC_SFX_DAT = "Data"; |
| |
| static final int VERSION_START = 0; |
| - static final int VERSION_CURRENT = VERSION_START; |
| + static final int VERSION_CHECKSUM = 1; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| static final int BLOCK_SIZE = 64; |
| |
| @@ -220,9 +221,11 @@ |
| lastTerm = new BytesRef(ArrayUtil.oversize(30, 1)); |
| |
| boolean success = false; |
| - IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); |
| + IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), |
| + context); |
| try { |
| - vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context); |
| + vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), |
| + context); |
| |
| final String codecNameIdx = formatName + CODEC_SFX_IDX; |
| final String codecNameDat = formatName + CODEC_SFX_DAT; |
| @@ -659,6 +662,7 @@ |
| throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs); |
| } |
| indexWriter.finish(numDocs); |
| + CodecUtil.writeFooter(vectorsStream); |
| } |
| |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java (working copy) |
| @@ -21,6 +21,8 @@ |
| import java.util.Arrays; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| +import org.apache.lucene.index.IndexFormatTooOldException; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.CompoundFileDirectory; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| @@ -198,9 +200,12 @@ |
| // Changed DGaps to encode gaps between cleared bits, not |
| // set: |
| public final static int VERSION_DGAPS_CLEARED = 1; |
| + |
| + // added checksum |
| + public final static int VERSION_CHECKSUM = 2; |
| |
| // Increment version to change it: |
| - public final static int VERSION_CURRENT = VERSION_DGAPS_CLEARED; |
| + public final static int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| public int getVersion() { |
| return version; |
| @@ -221,6 +226,7 @@ |
| } else { |
| writeBits(output); |
| } |
| + CodecUtil.writeFooter(output); |
| assert verifyCount(); |
| } finally { |
| IOUtils.close(output); |
| @@ -324,7 +330,7 @@ |
| <code>d</code>, as written by the {@link #write} method. |
| */ |
| public BitVector(Directory d, String name, IOContext context) throws IOException { |
| - IndexInput input = d.openInput(name, context); |
| + ChecksumIndexInput input = d.openChecksumInput(name, context); |
| |
| try { |
| final int firstInt = input.readInt(); |
| @@ -334,8 +340,8 @@ |
| version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_CURRENT); |
| size = input.readInt(); |
| } else { |
| - version = VERSION_PRE; |
| - size = firstInt; |
| + // we started writing full header well before 4.0 |
| + throw new IndexFormatTooOldException(input.toString(), Integer.toString(firstInt)); |
| } |
| if (size == -1) { |
| if (version >= VERSION_DGAPS_CLEARED) { |
| @@ -351,6 +357,11 @@ |
| invertAll(); |
| } |
| |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(input); |
| + } else { |
| + CodecUtil.checkEOF(input); |
| + } |
| assert verifyCount(); |
| } finally { |
| input.close(); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java (working copy) |
| @@ -105,9 +105,7 @@ |
| default: |
| throw new AssertionError(); |
| } |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| + CodecUtil.checkEOF(input); |
| success = true; |
| } finally { |
| if (success) { |
| @@ -327,9 +325,7 @@ |
| PagedBytes bytes = new PagedBytes(16); |
| bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount()); |
| final PagedBytes.Reader bytesReader = bytes.freeze(true); |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| + CodecUtil.checkEOF(input); |
| success = true; |
| ramBytesUsed.addAndGet(bytes.ramBytesUsed()); |
| return new BinaryDocValues() { |
| @@ -367,12 +363,8 @@ |
| bytes.copy(data, totalBytes); |
| final PagedBytes.Reader bytesReader = bytes.freeze(true); |
| final PackedInts.Reader reader = PackedInts.getReader(index); |
| - if (data.getFilePointer() != data.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); |
| - } |
| - if (index.getFilePointer() != index.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); |
| - } |
| + CodecUtil.checkEOF(data); |
| + CodecUtil.checkEOF(index); |
| success = true; |
| ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed()); |
| return new BinaryDocValues() { |
| @@ -414,12 +406,8 @@ |
| bytes.copy(data, fixedLength * (long) valueCount); |
| final PagedBytes.Reader bytesReader = bytes.freeze(true); |
| final PackedInts.Reader reader = PackedInts.getReader(index); |
| - if (data.getFilePointer() != data.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); |
| - } |
| - if (index.getFilePointer() != index.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); |
| - } |
| + CodecUtil.checkEOF(data); |
| + CodecUtil.checkEOF(index); |
| ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed()); |
| success = true; |
| return new BinaryDocValues() { |
| @@ -459,12 +447,8 @@ |
| bytes.copy(data, totalBytes); |
| final PagedBytes.Reader bytesReader = bytes.freeze(true); |
| final PackedInts.Reader reader = PackedInts.getReader(index); |
| - if (data.getFilePointer() != data.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); |
| - } |
| - if (index.getFilePointer() != index.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); |
| - } |
| + CodecUtil.checkEOF(data); |
| + CodecUtil.checkEOF(index); |
| ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed()); |
| success = true; |
| return new BinaryDocValues() { |
| @@ -515,12 +499,8 @@ |
| default: |
| throw new AssertionError(); |
| } |
| - if (data.getFilePointer() != data.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); |
| - } |
| - if (index.getFilePointer() != index.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); |
| - } |
| + CodecUtil.checkEOF(data); |
| + CodecUtil.checkEOF(index); |
| success = true; |
| } finally { |
| if (success) { |
| @@ -654,4 +634,8 @@ |
| public long ramBytesUsed() { |
| return ramBytesUsed.get(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (working copy) |
| @@ -107,9 +107,7 @@ |
| omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes)); |
| } |
| |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| + CodecUtil.checkEOF(input); |
| FieldInfos fieldInfos = new FieldInfos(infos); |
| success = true; |
| return fieldInfos; |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (working copy) |
| @@ -1168,4 +1168,7 @@ |
| return 0; |
| } |
| |
| + @Override |
| + public void validate() throws IOException {} |
| + |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (working copy) |
| @@ -64,9 +64,7 @@ |
| input.readStringStringMap(); // read deprecated attributes |
| final Set<String> files = input.readStringSet(); |
| |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| + CodecUtil.checkEOF(input); |
| |
| final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); |
| si.setFiles(files); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (working copy) |
| @@ -249,4 +249,7 @@ |
| public long ramBytesUsed() { |
| return 0; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (working copy) |
| @@ -760,5 +760,8 @@ |
| public long ramBytesUsed() { |
| return 0; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (working copy) |
| @@ -132,6 +132,7 @@ |
| * <li>Header, --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}</li> |
| * <li>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| @@ -190,7 +191,7 @@ |
| * each packed or VInt block, when the length of document list is larger than packed block size.</p> |
| * |
| * <ul> |
| - * <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup></li> |
| + * <li>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>, |
| * VIntBlock? </li> |
| @@ -206,6 +207,7 @@ |
| * --> |
| * {@link DataOutput#writeVInt VInt}</li> |
| * <li>SkipChildLevelPointer --> {@link DataOutput#writeVLong VLong}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| @@ -273,7 +275,7 @@ |
| * <p>The .pos file contains the lists of positions that each term occurs at within documents. It also |
| * sometimes stores part of payloads and offsets for speedup.</p> |
| * <ul> |
| - * <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup></li> |
| + * <li>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>, |
| * VIntBlock? </li> |
| @@ -283,6 +285,7 @@ |
| * <li>PositionDelta, OffsetDelta, OffsetLength --> |
| * {@link DataOutput#writeVInt VInt}</li> |
| * <li>PayloadData --> {@link DataOutput#writeByte byte}<sup>PayLength</sup></li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| @@ -325,7 +328,7 @@ |
| * <p>The .pay file will store payloads and offsets associated with certain term-document positions. |
| * Some payloads and offsets will be separated out into .pos file, for performance reasons.</p> |
| * <ul> |
| - * <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup></li> |
| + * <li>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup> |
| * <li>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup> |
| @@ -332,6 +335,7 @@ |
| * <li>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> {@link PackedInts PackedInts}</li> |
| * <li>SumPayLength --> {@link DataOutput#writeVInt VInt}</li> |
| * <li>PayData --> {@link DataOutput#writeByte byte}<sup>SumPayLength</sup></li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (working copy) |
| @@ -35,7 +35,6 @@ |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| -import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| @@ -1547,4 +1546,18 @@ |
| return 0; |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) { |
| + if (docIn != null) { |
| + CodecUtil.checksumEntireFile(docIn); |
| + } |
| + if (posIn != null) { |
| + CodecUtil.checksumEntireFile(posIn); |
| + } |
| + if (payIn != null) { |
| + CodecUtil.checksumEntireFile(payIn); |
| + } |
| + } |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java (working copy) |
| @@ -64,11 +64,12 @@ |
| // Increment version to change it |
| final static int VERSION_START = 0; |
| final static int VERSION_META_ARRAY = 1; |
| - final static int VERSION_CURRENT = VERSION_META_ARRAY; |
| + final static int VERSION_CHECKSUM = 2; |
| + final static int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| - final IndexOutput docOut; |
| - final IndexOutput posOut; |
| - final IndexOutput payOut; |
| + IndexOutput docOut; |
| + IndexOutput posOut; |
| + IndexOutput payOut; |
| |
| final static IntBlockTermState emptyState = new IntBlockTermState(); |
| IntBlockTermState lastState; |
| @@ -113,7 +114,7 @@ |
| super(); |
| |
| docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), |
| - state.context); |
| + state.context); |
| IndexOutput posOut = null; |
| IndexOutput payOut = null; |
| boolean success = false; |
| @@ -123,7 +124,7 @@ |
| if (state.fieldInfos.hasProx()) { |
| posDeltaBuffer = new int[MAX_DATA_SIZE]; |
| posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), |
| - state.context); |
| + state.context); |
| CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); |
| |
| if (state.fieldInfos.hasPayloads()) { |
| @@ -144,7 +145,7 @@ |
| |
| if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { |
| payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), |
| - state.context); |
| + state.context); |
| CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT); |
| } |
| } else { |
| @@ -569,6 +570,26 @@ |
| |
| @Override |
| public void close() throws IOException { |
| - IOUtils.close(docOut, posOut, payOut); |
| + // TODO: add a finish() at least to PushBase? DV too...? |
| + boolean success = false; |
| + try { |
| + if (docOut != null) { |
| + CodecUtil.writeFooter(docOut); |
| + } |
| + if (posOut != null) { |
| + CodecUtil.writeFooter(posOut); |
| + } |
| + if (payOut != null) { |
| + CodecUtil.writeFooter(payOut); |
| + } |
| + success = true; |
| + } finally { |
| + if (success) { |
| + IOUtils.close(docOut, posOut, payOut); |
| + } else { |
| + IOUtils.closeWhileHandlingException(docOut, posOut, payOut); |
| + } |
| + docOut = posOut = payOut = null; |
| + } |
| } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (working copy) |
| @@ -68,7 +68,7 @@ |
| * <p>The DocValues metadata or .dvm file.</p> |
| * <p>For DocValues field, this stores metadata, such as the offset into the |
| * DocValues data (.dvd)</p> |
| - * <p>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup></p> |
| + * <p>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup>,Footer</p> |
| * <ul> |
| * <li>Entry --> NumericEntry | BinaryEntry | SortedEntry</li> |
| * <li>NumericEntry --> DataOffset,CompressionType,PackedVersion</li> |
| @@ -78,6 +78,7 @@ |
| * <li>DataOffset,DataLength --> {@link DataOutput#writeLong Int64}</li> |
| * <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Sorted fields have two entries: a SortedEntry with the FST metadata, |
| * and an ordinary NumericEntry for the document-to-ord metadata.</p> |
| @@ -105,7 +106,7 @@ |
| * <li><a name="dvd" id="dvd"></a> |
| * <p>The DocValues data or .dvd file.</p> |
| * <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p> |
| - * <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup></p> |
| + * <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup>,Footer</p> |
| * <ul> |
| * <li>NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li> |
| * <li>BinaryData --> {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li> |
| @@ -114,6 +115,7 @@ |
| * <li>TableCompressedNumerics --> TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li> |
| * <li>UncompressedNumerics --> {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li> |
| * <li>Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>SortedSet entries store the list of ordinals in their BinaryData as a |
| * sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p> |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (working copy) |
| @@ -37,6 +37,7 @@ |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -64,6 +65,7 @@ |
| private final Map<Integer,BinaryEntry> binaries; |
| private final Map<Integer,FSTEntry> fsts; |
| private final IndexInput data; |
| + private final int version; |
| |
| // ram instances we have already loaded |
| private final Map<Integer,NumericDocValues> numericInstances = |
| @@ -89,16 +91,16 @@ |
| |
| static final int VERSION_START = 0; |
| static final int VERSION_GCD_COMPRESSION = 1; |
| - static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION; |
| + static final int VERSION_CHECKSUM = 2; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| maxDoc = state.segmentInfo.getDocCount(); |
| String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); |
| // read in the entries from the metadata file. |
| - IndexInput in = state.directory.openInput(metaName, state.context); |
| + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); |
| boolean success = false; |
| ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); |
| - final int version; |
| try { |
| version = CodecUtil.checkHeader(in, metaCodec, |
| VERSION_START, |
| @@ -108,8 +110,10 @@ |
| fsts = new HashMap<>(); |
| readFields(in, state.fieldInfos); |
| |
| - if (in.getFilePointer() != in.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(in); |
| + } else { |
| + CodecUtil.checkEOF(in); |
| } |
| |
| success = true; |
| @@ -199,6 +203,13 @@ |
| return ramBytesUsed.get(); |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(data); |
| + } |
| + } |
| + |
| private NumericDocValues loadNumeric(FieldInfo field) throws IOException { |
| NumericEntry entry = numerics.get(field.number); |
| data.seek(entry.offset); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java (working copy) |
| @@ -92,9 +92,7 @@ |
| omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); |
| } |
| |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| - } |
| + CodecUtil.checkEOF(input); |
| FieldInfos fieldInfos = new FieldInfos(infos); |
| success = true; |
| return fieldInfos; |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java (working copy) |
| @@ -34,14 +34,12 @@ |
| import org.apache.lucene.util.packed.PackedInts.FormatAndBits; |
| import org.apache.lucene.util.packed.PackedInts; |
| |
| +import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT; |
| + |
| /** |
| * Writer for {@link Lucene42NormsFormat} |
| */ |
| -class Lucene42NormsConsumer extends DocValuesConsumer { |
| - static final int VERSION_START = 0; |
| - static final int VERSION_GCD_COMPRESSION = 1; |
| - static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION; |
| - |
| +class Lucene42NormsConsumer extends DocValuesConsumer { |
| static final byte NUMBER = 0; |
| |
| static final int BLOCK_SIZE = 4096; |
| @@ -51,7 +49,7 @@ |
| static final byte UNCOMPRESSED = 2; |
| static final byte GCD_COMPRESSED = 3; |
| |
| - final IndexOutput data, meta; |
| + IndexOutput data, meta; |
| final int maxDoc; |
| final float acceptableOverheadRatio; |
| |
| @@ -181,7 +179,11 @@ |
| try { |
| if (meta != null) { |
| meta.writeVInt(-1); // write EOF marker |
| + CodecUtil.writeFooter(meta); // write checksum |
| } |
| + if (data != null) { |
| + CodecUtil.writeFooter(data); // write checksum |
| + } |
| success = true; |
| } finally { |
| if (success) { |
| @@ -189,6 +191,7 @@ |
| } else { |
| IOUtils.closeWhileHandlingException(data, meta); |
| } |
| + meta = data = null; |
| } |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java (working copy) |
| @@ -59,7 +59,7 @@ |
| * {@link BlockPackedWriter blocks of packed ints} for positions.</p> |
| * <p>Here is a more detailed description of the field data file format:</p> |
| * <ul> |
| - * <li>VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk><sup>ChunkCount</sup></li> |
| + * <li>VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk><sup>ChunkCount</sup>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>PackedIntsVersion --> {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li> |
| * <li>ChunkSize is the number of bytes of terms to accumulate before flushing, as a {@link DataOutput#writeVInt VInt}</li> |
| @@ -107,14 +107,16 @@ |
| * <li>FieldTermsAndPayLoads --> Terms (Payloads)</li> |
| * <li>Terms: term bytes</li> |
| * <li>Payloads: payload bytes (if the field has payloads)</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * </li> |
| * <li><a name="vector_index" id="vector_index"></a> |
| * <p>An index file (extension <tt>.tvx</tt>).</p> |
| * <ul> |
| - * <li>VectorIndex (.tvx) --> <Header>, <ChunkIndex></li> |
| + * <li>VectorIndex (.tvx) --> <Header>, <ChunkIndex>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * </li> |
| * </ol> |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java (working copy) |
| @@ -66,7 +66,7 @@ |
| * of indirection: docId -> ord. */ |
| public static final int SORTED_SET_SINGLE_VALUED_SORTED = 1; |
| |
| - final IndexOutput data, meta; |
| + IndexOutput data, meta; |
| final int maxDoc; |
| |
| /** expert: Creates a new writer */ |
| @@ -438,7 +438,11 @@ |
| try { |
| if (meta != null) { |
| meta.writeVInt(-1); // write EOF marker |
| + CodecUtil.writeFooter(meta); // write checksum |
| } |
| + if (data != null) { |
| + CodecUtil.writeFooter(data); // write checksum |
| + } |
| success = true; |
| } finally { |
| if (success) { |
| @@ -446,6 +450,7 @@ |
| } else { |
| IOUtils.closeWhileHandlingException(data, meta); |
| } |
| + meta = data = null; |
| } |
| } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java (working copy) |
| @@ -89,7 +89,7 @@ |
| * <p>The DocValues metadata or .dvm file.</p> |
| * <p>For DocValues field, this stores metadata, such as the offset into the |
| * DocValues data (.dvd)</p> |
| - * <p>DocValues metadata (.dvm) --> Header,<Entry><sup>NumFields</sup></p> |
| + * <p>DocValues metadata (.dvm) --> Header,<Entry><sup>NumFields</sup>,Footer</p> |
| * <ul> |
| * <li>Entry --> NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li> |
| * <li>NumericEntry --> GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li> |
| @@ -109,6 +109,7 @@ |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --> {@link DataOutput#writeLong Int64}</li> |
| * <li>TableSize --> {@link DataOutput#writeVInt vInt}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Sorted fields have two entries: a BinaryEntry with the value metadata, |
| * and an ordinary NumericEntry for the document-to-ord metadata.</p> |
| @@ -138,10 +139,13 @@ |
| * is written for the addresses. |
| * <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field. |
| * If its -1, then there are no missing values. |
| + * <p>Checksum contains the CRC32 checksum of all bytes in the .dvm file up |
| + * until the checksum. This is used to verify integrity of the file on opening the |
| + * index. |
| * <li><a name="dvd" id="dvd"></a> |
| * <p>The DocValues data or .dvd file.</p> |
| * <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p> |
| - * <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup></p> |
| + * <p>DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData><sup>NumFields</sup>,Footer</p> |
| * <ul> |
| * <li>NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics</li> |
| * <li>BinaryData --> {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li> |
| @@ -150,6 +154,7 @@ |
| * <li>TableCompressedNumerics --> {@link PackedInts PackedInts}</li> |
| * <li>GCDCompressedNumerics --> {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li> |
| * <li>Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>SortedSet entries store the list of ordinals in their BinaryData as a |
| * sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p> |
| @@ -179,7 +184,8 @@ |
| static final String META_EXTENSION = "dvm"; |
| static final int VERSION_START = 0; |
| static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1; |
| - static final int VERSION_CURRENT = VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED; |
| + static final int VERSION_CHECKSUM = 2; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| static final byte NUMERIC = 0; |
| static final byte BINARY = 1; |
| static final byte SORTED = 2; |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (working copy) |
| @@ -50,6 +50,7 @@ |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.index.TermsEnum.SeekStatus; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -80,7 +81,7 @@ |
| protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); |
| // read in the entries from the metadata file. |
| - IndexInput in = state.directory.openInput(metaName, state.context); |
| + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); |
| this.maxDoc = state.segmentInfo.getDocCount(); |
| boolean success = false; |
| try { |
| @@ -94,8 +95,10 @@ |
| sortedSets = new HashMap<>(); |
| readFields(in, state.fieldInfos); |
| |
| - if (in.getFilePointer() != in.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); |
| + if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(in); |
| + } else { |
| + CodecUtil.checkEOF(in); |
| } |
| |
| success = true; |
| @@ -299,6 +302,13 @@ |
| return ramBytesUsed.get(); |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) { |
| + CodecUtil.checksumEntireFile(data); |
| + } |
| + } |
| + |
| LongValues getNumeric(NumericEntry entry) throws IOException { |
| final IndexInput data = this.data.clone(); |
| data.seek(entry.offset); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java (working copy) |
| @@ -32,7 +32,7 @@ |
| * <p> |
| * <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p> |
| * <p>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, |
| - * FieldBits,DocValuesBits,DocValuesGen,Attributes> <sup>FieldsCount</sup></p> |
| + * FieldBits,DocValuesBits,DocValuesGen,Attributes> <sup>FieldsCount</sup>,Footer</p> |
| * <p>Data types: |
| * <ul> |
| * <li>Header --> {@link CodecUtil#checkHeader CodecHeader}</li> |
| @@ -42,6 +42,7 @@ |
| * <li>FieldNumber --> {@link DataOutput#writeInt VInt}</li> |
| * <li>Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}</li> |
| * <li>DocValuesGen --> {@link DataOutput#writeLong(long) Int64}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * </p> |
| * Field Descriptions: |
| @@ -113,7 +114,8 @@ |
| // Codec header |
| static final String CODEC_NAME = "Lucene46FieldInfos"; |
| static final int FORMAT_START = 0; |
| - static final int FORMAT_CURRENT = FORMAT_START; |
| + static final int FORMAT_CHECKSUM = 1; |
| + static final int FORMAT_CURRENT = FORMAT_CHECKSUM; |
| |
| // Field flags |
| static final byte IS_INDEXED = 0x1; |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java (working copy) |
| @@ -29,6 +29,7 @@ |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.FieldInfo.DocValuesType; |
| import org.apache.lucene.index.FieldInfo.IndexOptions; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| @@ -49,13 +50,13 @@ |
| @Override |
| public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext context) throws IOException { |
| final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); |
| - IndexInput input = directory.openInput(fileName, context); |
| + ChecksumIndexInput input = directory.openChecksumInput(fileName, context); |
| |
| boolean success = false; |
| try { |
| - CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, |
| - Lucene46FieldInfosFormat.FORMAT_START, |
| - Lucene46FieldInfosFormat.FORMAT_CURRENT); |
| + int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, |
| + Lucene46FieldInfosFormat.FORMAT_START, |
| + Lucene46FieldInfosFormat.FORMAT_CURRENT); |
| |
| final int size = input.readVInt(); //read in the size |
| FieldInfo infos[] = new FieldInfo[size]; |
| @@ -91,9 +92,11 @@ |
| omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); |
| infos[i].setDocValuesGen(dvGen); |
| } |
| - |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| + |
| + if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { |
| + CodecUtil.checkFooter(input); |
| + } else { |
| + CodecUtil.checkEOF(input); |
| } |
| FieldInfos fieldInfos = new FieldInfos(infos); |
| success = true; |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java (working copy) |
| @@ -26,9 +26,9 @@ |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.IndexFileNames; |
| +import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.IOUtils; |
| |
| /** |
| @@ -81,6 +81,7 @@ |
| output.writeLong(fi.getDocValuesGen()); |
| output.writeStringStringMap(fi.attributes()); |
| } |
| + CodecUtil.writeFooter(output); |
| success = true; |
| } finally { |
| if (success) { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (working copy) |
| @@ -31,7 +31,7 @@ |
| * <p> |
| * Files: |
| * <ul> |
| - * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files |
| + * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer |
| * </ul> |
| * </p> |
| * Data types: |
| @@ -43,6 +43,7 @@ |
| * <li>Files --> {@link DataOutput#writeStringSet Set<String>}</li> |
| * <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li> |
| * <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * </p> |
| * Field Descriptions: |
| @@ -53,9 +54,6 @@ |
| * <li>IsCompoundFile records whether the segment is written as a compound file or |
| * not. If this is -1, the segment is not a compound file. If it is 1, the segment |
| * is a compound file.</li> |
| - * <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up |
| - * until the checksum. This is used to verify integrity of the file on opening the |
| - * index.</li> |
| * <li>The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, |
| * for each segment it creates. It includes metadata like the current Lucene |
| * version, OS, Java version, why the segment was created (merge, flush, |
| @@ -89,5 +87,6 @@ |
| public final static String SI_EXTENSION = "si"; |
| static final String CODEC_NAME = "Lucene46SegmentInfo"; |
| static final int VERSION_START = 0; |
| - static final int VERSION_CURRENT = VERSION_START; |
| + static final int VERSION_CHECKSUM = 1; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (working copy) |
| @@ -26,9 +26,9 @@ |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| +import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.IOUtils; |
| |
| /** |
| @@ -46,12 +46,12 @@ |
| @Override |
| public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException { |
| final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION); |
| - final IndexInput input = dir.openInput(fileName, context); |
| + final ChecksumIndexInput input = dir.openChecksumInput(fileName, context); |
| boolean success = false; |
| try { |
| - CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME, |
| - Lucene46SegmentInfoFormat.VERSION_START, |
| - Lucene46SegmentInfoFormat.VERSION_CURRENT); |
| + int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME, |
| + Lucene46SegmentInfoFormat.VERSION_START, |
| + Lucene46SegmentInfoFormat.VERSION_CURRENT); |
| final String version = input.readString(); |
| final int docCount = input.readInt(); |
| if (docCount < 0) { |
| @@ -61,8 +61,10 @@ |
| final Map<String,String> diagnostics = input.readStringStringMap(); |
| final Set<String> files = input.readStringSet(); |
| |
| - if (input.getFilePointer() != input.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); |
| + if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(input); |
| + } else { |
| + CodecUtil.checkEOF(input); |
| } |
| |
| final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (working copy) |
| @@ -59,7 +59,7 @@ |
| output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); |
| output.writeStringStringMap(si.getDiagnostics()); |
| output.writeStringSet(si.files()); |
| - |
| + CodecUtil.writeFooter(output); |
| success = true; |
| } finally { |
| if (!success) { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html (working copy) |
| @@ -383,6 +383,9 @@ |
| <li>In version 4.5, DocValues were extended to explicitly represent missing values.</li> |
| <li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to |
| allow updating NumericDocValues fields.</li> |
| +<li>In version 4.8, checksum footers were added to the end of each index file |
| +for improved data integrity. Specifically, the last 8 bytes of every index file |
| +contain the zlib-crc32 checksum of the file.</li> |
| </ul> |
| <a name="Limitations" id="Limitations"></a> |
| <h2>Limitations</h2> |
| Index: lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (working copy) |
| @@ -310,6 +310,13 @@ |
| } |
| return size; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + for (DocValuesProducer format : formats.values()) { |
| + format.validate(); |
| + } |
| + } |
| } |
| |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (working copy) |
| @@ -246,6 +246,13 @@ |
| } |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + for (FieldsProducer producer : formats.values()) { |
| + producer.validate(); |
| + } |
| + } |
| } |
| |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/index/AtomicReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (working copy) |
| @@ -238,4 +238,12 @@ |
| * synchronization. |
| */ |
| public abstract Bits getLiveDocs(); |
| + |
| + /** |
| + * Validates/runs consistency checks on this reader. |
| + * <p> |
| + * Note that this may be costly in terms of I/O, e.g. |
| + * may involve computing a checksum value against large data files. |
| + */ |
| + public abstract void validate() throws IOException; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy) |
| @@ -536,6 +536,10 @@ |
| reader = new SegmentReader(info, IOContext.DEFAULT); |
| |
| segInfoStat.openReaderPassed = true; |
| + |
| + if (infoStream != null) |
| + infoStream.print(" test: validate reader........."); |
| + reader.validate(); |
| |
| final int numDocs = reader.numDocs(); |
| toLoseDocCount = numDocs; |
| Index: lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (working copy) |
| @@ -423,4 +423,9 @@ |
| return in.getDocsWithField(field); |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + ensureOpen(); |
| + in.validate(); |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) |
| @@ -2657,7 +2657,8 @@ |
| false, codec, null); |
| |
| SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, |
| - MergeState.CheckAbort.NONE, globalFieldNumberMap, context); |
| + MergeState.CheckAbort.NONE, globalFieldNumberMap, |
| + context, config.getValidateAtMerge()); |
| |
| if (!merger.shouldMerge()) { |
| return; |
| @@ -4057,7 +4058,8 @@ |
| // OneMerge to return a view over the actual segments to merge |
| final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), |
| merge.info.info, infoStream, dirWrapper, |
| - checkAbort, globalFieldNumberMap, context); |
| + checkAbort, globalFieldNumberMap, |
| + context, config.getValidateAtMerge()); |
| |
| merge.checkAborted(directory); |
| |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy) |
| @@ -110,6 +110,12 @@ |
| * (set to <code>true</code>). For batch indexing with very large |
| * ram buffers use <code>false</code> */ |
| public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true; |
| + |
| + /** Default value for calling {@link AtomicReader#validate()} before |
| + * merging segments (set to <code>false</code>). You can set this |
| + * to <code>true</code> for additional safety. */ |
| + public final static boolean DEFAULT_VALIDATE_AT_MERGE = false; |
| + |
| /** |
| * Sets the default (for any instance) maximum time to wait for a write lock |
| * (in milliseconds). |
| Index: lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java (working copy) |
| @@ -97,6 +97,9 @@ |
| |
| /** True if segment flushes should use compound file format */ |
| protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; |
| + |
| + /** True if merging should validate segments first */ |
| + protected volatile boolean validateAtMerge = IndexWriterConfig.DEFAULT_VALIDATE_AT_MERGE; |
| |
| // used by IndexWriterConfig |
| LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { |
| @@ -152,6 +155,7 @@ |
| flushPolicy = config.getFlushPolicy(); |
| perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB(); |
| useCompoundFile = config.getUseCompoundFile(); |
| + validateAtMerge = config.getValidateAtMerge(); |
| } |
| |
| /** Returns the default analyzer to use for indexing documents. */ |
| @@ -475,6 +479,26 @@ |
| return useCompoundFile ; |
| } |
| |
| + /** |
| + * Sets if {@link IndexWriter} should call {@link AtomicReader#validate()} |
| + * on existing segments before merging them into a new one. |
| + * <p> |
| + * Use <code>true</code> to enable this safety check, which can help |
| + * reduce the risk of propagating index corruption from older segments |
| + * into new ones, at the expense of slower merging. |
| + * </p> |
| + */ |
| + public LiveIndexWriterConfig setValidateAtMerge(boolean validateAtMerge) { |
| + this.validateAtMerge = validateAtMerge; |
| + return this; |
| + } |
| + |
| + /** Returns true if {@link AtomicReader#validate()} is called before |
| + * merging segments. */ |
| + public boolean getValidateAtMerge() { |
| + return validateAtMerge; |
| + } |
| + |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| @@ -499,6 +523,7 @@ |
| sb.append("readerPooling=").append(getReaderPooling()).append("\n"); |
| sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); |
| sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n"); |
| + sb.append("validateAtMerge=").append(getValidateAtMerge()).append("\n"); |
| return sb.toString(); |
| } |
| |
| Index: lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (working copy) |
| @@ -299,4 +299,12 @@ |
| NumericDocValues values = reader == null ? null : reader.getNormValues(field); |
| return values; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + ensureOpen(); |
| + for (AtomicReader reader : completeReaderSet) { |
| + reader.validate(); |
| + } |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (working copy) |
| @@ -95,14 +95,4 @@ |
| IOUtils.reThrow(t); |
| } |
| } |
| - |
| - /** Returns approximate RAM bytes used. */ |
| - synchronized long ramBytesUsed() { |
| - long ramBytesUsed = 0; |
| - for (RefCount<DocValuesProducer> dvp : genDVProducers.values()) { |
| - ramBytesUsed += dvp.get().ramBytesUsed(); |
| - } |
| - return ramBytesUsed; |
| - } |
| - |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (working copy) |
| @@ -36,11 +36,9 @@ |
| import org.apache.lucene.codecs.FieldInfosFormat; |
| import org.apache.lucene.codecs.LiveDocsFormat; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| -import org.apache.lucene.store.ChecksumIndexOutput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| -import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.NoSuchDirectoryException; |
| import org.apache.lucene.util.IOUtils; |
| @@ -69,10 +67,10 @@ |
| * <p> |
| * Files: |
| * <ul> |
| - * <li><tt>segments.gen</tt>: GenHeader, Generation, Generation |
| + * <li><tt>segments.gen</tt>: GenHeader, Generation, Generation, Footer |
| * <li><tt>segments_N</tt>: Header, Version, NameCounter, SegCount, |
| * <SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles><sup>SegCount</sup>, |
| - * CommitUserData, Checksum |
| + * CommitUserData, Footer |
| * </ul> |
| * </p> |
| * Data types: |
| @@ -84,6 +82,7 @@ |
| * <li>SegName, SegCodec --> {@link DataOutput#writeString String}</li> |
| * <li>CommitUserData --> {@link DataOutput#writeStringStringMap Map<String,String>}</li> |
| * <li>UpdatesFiles --> {@link DataOutput#writeStringSet(Set) Set<String>}</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * </p> |
| * Field Descriptions: |
| @@ -98,9 +97,6 @@ |
| * there are no deletes. Anything above zero means there are deletes |
| * stored by {@link LiveDocsFormat}.</li> |
| * <li>DeletionCount records the number of deleted documents in this segment.</li> |
| - * <li>Checksum contains the CRC32 checksum of all bytes in the segments_N file up |
| - * until the checksum. This is used to verify integrity of the file on opening the |
| - * index.</li> |
| * <li>SegCodec is the {@link Codec#getName() name} of the Codec that encoded |
| * this segment.</li> |
| * <li>CommitUserData stores an optional user-supplied opaque |
| @@ -122,10 +118,17 @@ |
| |
| /** The file format version for the segments_N codec header, since 4.6+. */ |
| public static final int VERSION_46 = 1; |
| + |
| + /** The file format version for the segments_N codec header, since 4.8+ */ |
| + public static final int VERSION_48 = 2; |
| |
| - /** Used for the segments.gen file only! |
| - * Whenever you add a new format, make it 1 smaller (negative version logic)! */ |
| - public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2; |
| + // Used for the segments.gen file only! |
| + // Whenever you add a new format, make it 1 smaller (negative version logic)! |
| + private static final int FORMAT_SEGMENTS_GEN_47 = -2; |
| + private static final int FORMAT_SEGMENTS_GEN_CHECKSUM = -3; |
| + private static final int FORMAT_SEGMENTS_GEN_START = FORMAT_SEGMENTS_GEN_47; |
| + /** Current format of segments.gen */ |
| + public static final int FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM; |
| |
| /** Used to name new segments. */ |
| public int counter; |
| @@ -266,6 +269,7 @@ |
| genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT); |
| genOutput.writeLong(generation); |
| genOutput.writeLong(generation); |
| + CodecUtil.writeFooter(genOutput); |
| } finally { |
| genOutput.close(); |
| dir.sync(Collections.singleton(IndexFileNames.SEGMENTS_GEN)); |
| @@ -317,7 +321,7 @@ |
| |
| lastGeneration = generation; |
| |
| - ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ)); |
| + ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ); |
| try { |
| // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need |
| // to read the magic ourselves. |
| @@ -326,7 +330,7 @@ |
| throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); |
| } |
| // 4.0+ |
| - int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_46); |
| + int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_48); |
| version = input.readLong(); |
| counter = input.readInt(); |
| int numSegments = input.readInt(); |
| @@ -366,10 +370,15 @@ |
| } |
| userData = input.readStringStringMap(); |
| |
| - final long checksumNow = input.getChecksum(); |
| - final long checksumThen = input.readLong(); |
| - if (checksumNow != checksumThen) { |
| - throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")"); |
| + if (format >= VERSION_48) { |
| + CodecUtil.checkFooter(input); |
| + } else { |
| + final long checksumNow = input.getChecksum(); |
| + final long checksumThen = input.readLong(); |
| + if (checksumNow != checksumThen) { |
| + throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")"); |
| + } |
| + CodecUtil.checkEOF(input); |
| } |
| |
| success = true; |
| @@ -402,7 +411,7 @@ |
| |
| // Only non-null after prepareCommit has been called and |
| // before finishCommit is called |
| - ChecksumIndexOutput pendingSegnOutput; |
| + IndexOutput pendingSegnOutput; |
| |
| private void write(Directory directory) throws IOException { |
| |
| @@ -415,12 +424,12 @@ |
| generation++; |
| } |
| |
| - ChecksumIndexOutput segnOutput = null; |
| + IndexOutput segnOutput = null; |
| boolean success = false; |
| |
| try { |
| - segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName, IOContext.DEFAULT)); |
| - CodecUtil.writeHeader(segnOutput, "segments", VERSION_46); |
| + segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); |
| + CodecUtil.writeHeader(segnOutput, "segments", VERSION_48); |
| segnOutput.writeLong(version); |
| segnOutput.writeInt(counter); // write counter |
| segnOutput.writeInt(size()); // write infos |
| @@ -641,9 +650,9 @@ |
| // a stale cache (NFS) we have a better chance of |
| // getting the right generation. |
| long genB = -1; |
| - IndexInput genInput = null; |
| + ChecksumIndexInput genInput = null; |
| try { |
| - genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE); |
| + genInput = directory.openChecksumInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE); |
| } catch (IOException e) { |
| if (infoStream != null) { |
| message("segments.gen open: IOException " + e); |
| @@ -653,18 +662,23 @@ |
| if (genInput != null) { |
| try { |
| int version = genInput.readInt(); |
| - if (version == FORMAT_SEGMENTS_GEN_CURRENT) { |
| + if (version == FORMAT_SEGMENTS_GEN_47 || version == FORMAT_SEGMENTS_GEN_CHECKSUM) { |
| long gen0 = genInput.readLong(); |
| long gen1 = genInput.readLong(); |
| if (infoStream != null) { |
| message("fallback check: " + gen0 + "; " + gen1); |
| } |
| + if (version == FORMAT_SEGMENTS_GEN_CHECKSUM) { |
| + CodecUtil.checkFooter(genInput); |
| + } else { |
| + CodecUtil.checkEOF(genInput); |
| + } |
| if (gen0 == gen1) { |
| // The file is consistent. |
| genB = gen0; |
| } |
| } else { |
| - throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT); |
| + throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_START, FORMAT_SEGMENTS_GEN_CURRENT); |
| } |
| } catch (IOException err2) { |
| // rethrow any format exception |
| @@ -863,7 +877,7 @@ |
| } |
| boolean success = false; |
| try { |
| - pendingSegnOutput.finishCommit(); |
| + CodecUtil.writeFooter(pendingSegnOutput); |
| success = true; |
| } finally { |
| if (!success) { |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) |
| @@ -52,7 +52,13 @@ |
| |
| // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! |
| SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, |
| - MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException { |
| + MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException { |
| + // validate incoming readers |
| + if (validate) { |
| + for (AtomicReader reader : readers) { |
| + reader.validate(); |
| + } |
| + } |
| mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); |
| directory = dir; |
| this.codec = segmentInfo.getCodec(); |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) |
| @@ -33,10 +33,13 @@ |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| +import java.util.Collections; |
| import java.util.HashMap; |
| +import java.util.IdentityHashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| +import java.util.Set; |
| |
| /** |
| * IndexReader implementation over a single segment. |
| @@ -72,7 +75,8 @@ |
| } |
| }; |
| |
| - final Map<String,DocValuesProducer> dvProducers = new HashMap<>(); |
| + final Map<String,DocValuesProducer> dvProducersByField = new HashMap<>(); |
| + final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>()); |
| |
| final FieldInfos fieldInfos; |
| |
| @@ -177,12 +181,15 @@ |
| |
| // System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gens=" + genInfos.keySet()); |
| |
| + // TODO: can we avoid iterating over fieldinfos several times and creating maps of all this stuff if dv updates do not exist? |
| + |
| for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) { |
| Long gen = e.getKey(); |
| List<FieldInfo> infos = e.getValue(); |
| DocValuesProducer dvp = segDocValues.getDocValuesProducer(gen, si, IOContext.READ, dir, dvFormat, infos); |
| for (FieldInfo fi : infos) { |
| - dvProducers.put(fi.name, dvp); |
| + dvProducersByField.put(fi.name, dvp); |
| + dvProducers.add(dvp); |
| } |
| } |
| |
| @@ -250,7 +257,7 @@ |
| try { |
| core.decRef(); |
| } finally { |
| - dvProducers.clear(); |
| + dvProducersByField.clear(); |
| try { |
| IOUtils.close(docValuesLocal, docsWithFieldLocal); |
| } finally { |
| @@ -395,13 +402,12 @@ |
| return null; |
| } |
| |
| - DocValuesProducer dvProducer = dvProducers.get(field); |
| - assert dvProducer != null; |
| - |
| Map<String,Object> dvFields = docValuesLocal.get(); |
| |
| NumericDocValues dvs = (NumericDocValues) dvFields.get(field); |
| if (dvs == null) { |
| + DocValuesProducer dvProducer = dvProducersByField.get(field); |
| + assert dvProducer != null; |
| dvs = dvProducer.getNumeric(fi); |
| dvFields.put(field, dvs); |
| } |
| @@ -422,13 +428,12 @@ |
| return null; |
| } |
| |
| - DocValuesProducer dvProducer = dvProducers.get(field); |
| - assert dvProducer != null; |
| - |
| Map<String,Bits> dvFields = docsWithFieldLocal.get(); |
| |
| Bits dvs = dvFields.get(field); |
| if (dvs == null) { |
| + DocValuesProducer dvProducer = dvProducersByField.get(field); |
| + assert dvProducer != null; |
| dvs = dvProducer.getDocsWithField(fi); |
| dvFields.put(field, dvs); |
| } |
| @@ -444,13 +449,12 @@ |
| return null; |
| } |
| |
| - DocValuesProducer dvProducer = dvProducers.get(field); |
| - assert dvProducer != null; |
| - |
| Map<String,Object> dvFields = docValuesLocal.get(); |
| |
| BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field); |
| if (dvs == null) { |
| + DocValuesProducer dvProducer = dvProducersByField.get(field); |
| + assert dvProducer != null; |
| dvs = dvProducer.getBinary(fi); |
| dvFields.put(field, dvs); |
| } |
| @@ -466,13 +470,12 @@ |
| return null; |
| } |
| |
| - DocValuesProducer dvProducer = dvProducers.get(field); |
| - assert dvProducer != null; |
| - |
| Map<String,Object> dvFields = docValuesLocal.get(); |
| |
| SortedDocValues dvs = (SortedDocValues) dvFields.get(field); |
| if (dvs == null) { |
| + DocValuesProducer dvProducer = dvProducersByField.get(field); |
| + assert dvProducer != null; |
| dvs = dvProducer.getSorted(fi); |
| dvFields.put(field, dvs); |
| } |
| @@ -488,13 +491,12 @@ |
| return null; |
| } |
| |
| - DocValuesProducer dvProducer = dvProducers.get(field); |
| - assert dvProducer != null; |
| - |
| Map<String,Object> dvFields = docValuesLocal.get(); |
| |
| SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field); |
| if (dvs == null) { |
| + DocValuesProducer dvProducer = dvProducersByField.get(field); |
| + assert dvProducer != null; |
| dvs = dvProducer.getSortedSet(fi); |
| dvFields.put(field, dvs); |
| } |
| @@ -548,8 +550,10 @@ |
| public long ramBytesUsed() { |
| ensureOpen(); |
| long ramBytesUsed = 0; |
| - if (segDocValues != null) { |
| - ramBytesUsed += segDocValues.ramBytesUsed(); |
| + if (dvProducers != null) { |
| + for (DocValuesProducer producer : dvProducers) { |
| + ramBytesUsed += producer.ramBytesUsed(); |
| + } |
| } |
| if (core != null) { |
| ramBytesUsed += core.ramBytesUsed(); |
| @@ -556,4 +560,35 @@ |
| } |
| return ramBytesUsed; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + ensureOpen(); |
| + |
| + // stored fields |
| + getFieldsReader().validate(); |
| + |
| + // term vectors |
| + TermVectorsReader termVectorsReader = getTermVectorsReader(); |
| + if (termVectorsReader != null) { |
| + termVectorsReader.validate(); |
| + } |
| + |
| + // terms/postings |
| + if (core.fields != null) { |
| + core.fields.validate(); |
| + } |
| + |
| + // norms |
| + if (core.normsProducer != null) { |
| + core.normsProducer.validate(); |
| + } |
| + |
| + // docvalues |
| + if (dvProducers != null) { |
| + for (DocValuesProducer producer : dvProducers) { |
| + producer.validate(); |
| + } |
| + } |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java (working copy) |
| @@ -239,4 +239,12 @@ |
| // TODO: as this is a wrapper, should we really close the delegate? |
| in.close(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + ensureOpen(); |
| + for (AtomicReaderContext ctx : in.leaves()) { |
| + ctx.reader().validate(); |
| + } |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java (revision 0) |
| +++ lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java (working copy) |
| @@ -0,0 +1,84 @@ |
| +package org.apache.lucene.store; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.util.zip.Checksum; |
| + |
| +/** |
| + * Wraps another {@link Checksum} with an internal buffer |
| + * to speed up checksum calculations. |
| + */ |
| +public class BufferedChecksum implements Checksum { |
| + private final Checksum in; |
| + private final byte buffer[]; |
| + private int upto; |
| + /** Default buffer size: 256 */ |
| + public static final int DEFAULT_BUFFERSIZE = 256; |
| + |
| + /** Create a new BufferedChecksum with {@link #DEFAULT_BUFFERSIZE} */ |
| + public BufferedChecksum(Checksum in) { |
| + this(in, DEFAULT_BUFFERSIZE); |
| + } |
| + |
| + /** Create a new BufferedChecksum with the specified bufferSize */ |
| + public BufferedChecksum(Checksum in, int bufferSize) { |
| + this.in = in; |
| + this.buffer = new byte[bufferSize]; |
| + } |
| + |
| + @Override |
| + public void update(int b) { |
| + if (upto == buffer.length) { |
| + flush(); |
| + } |
| + buffer[upto++] = (byte) b; |
| + } |
| + |
| + @Override |
| + public void update(byte[] b, int off, int len) { |
| + if (len >= buffer.length) { |
| + flush(); |
| + in.update(b, off, len); |
| + } else { |
| + if (upto + len > buffer.length) { |
| + flush(); |
| + } |
| + System.arraycopy(b, off, buffer, upto, len); |
| + upto += len; |
| + } |
| + } |
| + |
| + @Override |
| + public long getValue() { |
| + flush(); |
| + return in.getValue(); |
| + } |
| + |
| + @Override |
| + public void reset() { |
| + upto = 0; |
| + in.reset(); |
| + } |
| + |
| + private void flush() { |
| + if (upto > 0) { |
| + in.update(buffer, 0, upto); |
| + } |
| + upto = 0; |
| + } |
| +} |
| |
| Property changes on: lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java (revision 0) |
| +++ lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java (working copy) |
| @@ -0,0 +1,72 @@ |
| +package org.apache.lucene.store; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.zip.CRC32; |
| +import java.util.zip.Checksum; |
| + |
| +/** |
| + * Simple implementation of {@link ChecksumIndexInput} that wraps |
| + * another input and delegates calls. |
| + */ |
| +public class BufferedChecksumIndexInput extends ChecksumIndexInput { |
| + final IndexInput main; |
| + final Checksum digest; |
| + |
| + /** Creates a new BufferedChecksumIndexInput */ |
| + public BufferedChecksumIndexInput(IndexInput main) { |
| + super("BufferedChecksumIndexInput(" + main + ")"); |
| + this.main = main; |
| + this.digest = new BufferedChecksum(new CRC32()); |
| + } |
| + |
| + @Override |
| + public byte readByte() throws IOException { |
| + final byte b = main.readByte(); |
| + digest.update(b); |
| + return b; |
| + } |
| + |
| + @Override |
| + public void readBytes(byte[] b, int offset, int len) |
| + throws IOException { |
| + main.readBytes(b, offset, len); |
| + digest.update(b, offset, len); |
| + } |
| + |
| + @Override |
| + public long getChecksum() { |
| + return digest.getValue(); |
| + } |
| + |
| + @Override |
| + public void close() throws IOException { |
| + main.close(); |
| + } |
| + |
| + @Override |
| + public long getFilePointer() { |
| + return main.getFilePointer(); |
| + } |
| + |
| + @Override |
| + public long length() { |
| + return main.length(); |
| + } |
| +} |
| |
| Property changes on: lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java (working copy) |
| @@ -18,6 +18,7 @@ |
| */ |
| |
| import java.io.IOException; |
| +import java.util.zip.CRC32; |
| |
| /** Base implementation class for buffered {@link IndexOutput}. */ |
| public abstract class BufferedIndexOutput extends IndexOutput { |
| @@ -28,6 +29,7 @@ |
| private final byte[] buffer; |
| private long bufferStart = 0; // position in file of buffer |
| private int bufferPosition = 0; // position in buffer |
| + private final CRC32 crc = new CRC32(); |
| |
| /** |
| * Creates a new {@link BufferedIndexOutput} with the default buffer size |
| @@ -75,6 +77,7 @@ |
| if (bufferPosition > 0) |
| flush(); |
| // and write data at once |
| + crc.update(b, offset, length); |
| flushBuffer(b, offset, length); |
| bufferStart += length; |
| } else { |
| @@ -99,6 +102,7 @@ |
| |
| @Override |
| public void flush() throws IOException { |
| + crc.update(buffer, 0, bufferPosition); |
| flushBuffer(buffer, bufferPosition); |
| bufferStart += bufferPosition; |
| bufferPosition = 0; |
| @@ -141,4 +145,9 @@ |
| return bufferSize; |
| } |
| |
| + @Override |
| + public long getChecksum() throws IOException { |
| + flush(); |
| + return crc.getValue(); |
| + } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (working copy) |
| @@ -1,5 +1,7 @@ |
| package org.apache.lucene.store; |
| |
| +import java.io.IOException; |
| + |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| @@ -17,61 +19,24 @@ |
| * limitations under the License. |
| */ |
| |
| -import java.io.IOException; |
| -import java.util.zip.CRC32; |
| -import java.util.zip.Checksum; |
| - |
| -/** Reads bytes through to a primary IndexInput, computing |
| - * checksum as it goes. Note that you cannot use seek(). |
| - * |
| - * @lucene.internal |
| +/** |
| + * Extension of IndexInput, computing checksum as it goes. |
| + * Callers can retrieve the checksum via {@link #getChecksum()}. |
| */ |
| -public class ChecksumIndexInput extends IndexInput { |
| - IndexInput main; |
| - Checksum digest; |
| - |
| - public ChecksumIndexInput(IndexInput main) { |
| - super("ChecksumIndexInput(" + main + ")"); |
| - this.main = main; |
| - digest = new CRC32(); |
| - } |
| - |
| - @Override |
| - public byte readByte() throws IOException { |
| - final byte b = main.readByte(); |
| - digest.update(b); |
| - return b; |
| - } |
| - |
| - @Override |
| - public void readBytes(byte[] b, int offset, int len) |
| - throws IOException { |
| - main.readBytes(b, offset, len); |
| - digest.update(b, offset, len); |
| - } |
| - |
| +public abstract class ChecksumIndexInput extends IndexInput { |
| |
| - public long getChecksum() { |
| - return digest.getValue(); |
| + /** resourceDescription should be a non-null, opaque string |
| + * describing this resource; it's returned from |
| + * {@link #toString}. */ |
| + protected ChecksumIndexInput(String resourceDescription) { |
| + super(resourceDescription); |
| } |
| |
| - @Override |
| - public void close() throws IOException { |
| - main.close(); |
| - } |
| + /** Returns the current checksum value */ |
| + public abstract long getChecksum() throws IOException; |
| |
| @Override |
| - public long getFilePointer() { |
| - return main.getFilePointer(); |
| - } |
| - |
| - @Override |
| public void seek(long pos) { |
| throw new UnsupportedOperationException(); |
| } |
| - |
| - @Override |
| - public long length() { |
| - return main.length(); |
| - } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java (working copy) |
| @@ -1,78 +0,0 @@ |
| -package org.apache.lucene.store; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.zip.CRC32; |
| -import java.util.zip.Checksum; |
| - |
| -/** Writes bytes through to a primary IndexOutput, computing |
| - * checksum. |
| - * |
| - * @lucene.internal |
| - */ |
| -public class ChecksumIndexOutput extends IndexOutput { |
| - IndexOutput main; |
| - Checksum digest; |
| - |
| - public ChecksumIndexOutput(IndexOutput main) { |
| - this.main = main; |
| - digest = new CRC32(); |
| - } |
| - |
| - @Override |
| - public void writeByte(byte b) throws IOException { |
| - digest.update(b); |
| - main.writeByte(b); |
| - } |
| - |
| - @Override |
| - public void writeBytes(byte[] b, int offset, int length) throws IOException { |
| - digest.update(b, offset, length); |
| - main.writeBytes(b, offset, length); |
| - } |
| - |
| - public long getChecksum() { |
| - return digest.getValue(); |
| - } |
| - |
| - @Override |
| - public void flush() throws IOException { |
| - main.flush(); |
| - } |
| - |
| - @Override |
| - public void close() throws IOException { |
| - main.close(); |
| - } |
| - |
| - @Override |
| - public long getFilePointer() { |
| - return main.getFilePointer(); |
| - } |
| - |
| - /** writes the checksum */ |
| - public void finishCommit() throws IOException { |
| - main.writeLong(getChecksum()); |
| - } |
| - |
| - @Override |
| - public long length() throws IOException { |
| - return main.length(); |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java (working copy) |
| @@ -52,14 +52,15 @@ |
| * </ul> |
| * <p>Description:</p> |
| * <ul> |
| - * <li>Compound (.cfs) --> Header, FileData <sup>FileCount</sup></li> |
| + * <li>Compound (.cfs) --> Header, FileData <sup>FileCount</sup>, Footer</li> |
| * <li>Compound Entry Table (.cfe) --> Header, FileCount, <FileName, |
| * DataOffset, DataLength> <sup>FileCount</sup></li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>FileCount --> {@link DataOutput#writeVInt VInt}</li> |
| - * <li>DataOffset,DataLength --> {@link DataOutput#writeLong UInt64}</li> |
| + * <li>DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}</li> |
| * <li>FileName --> {@link DataOutput#writeString String}</li> |
| * <li>FileData --> raw file data</li> |
| + * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| @@ -87,6 +88,7 @@ |
| private static final Map<String,FileEntry> SENTINEL = Collections.emptyMap(); |
| private final CompoundFileWriter writer; |
| private final IndexInputSlicer handle; |
| + private int version; |
| |
| /** |
| * Create a new CompoundFileDirectory. |
| @@ -120,15 +122,15 @@ |
| } |
| |
| /** Helper method that reads CFS entries from an input stream */ |
| - private static final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException { |
| + private final Map<String, FileEntry> readEntries(Directory dir, String name) throws IOException { |
| IOException priorE = null; |
| - IndexInput entriesStream = null; |
| + ChecksumIndexInput entriesStream = null; |
| try { |
| final String entriesFileName = IndexFileNames.segmentFileName( |
| IndexFileNames.stripExtension(name), "", |
| IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); |
| - entriesStream = dir.openInput(entriesFileName, IOContext.READONCE); |
| - CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_START); |
| + entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE); |
| + version = CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); |
| final int numEntries = entriesStream.readVInt(); |
| final Map<String, FileEntry> mapping = new HashMap<>(numEntries); |
| for (int i = 0; i < numEntries; i++) { |
| @@ -141,8 +143,10 @@ |
| fileEntry.offset = entriesStream.readLong(); |
| fileEntry.length = entriesStream.readLong(); |
| } |
| - if (entriesStream.getFilePointer() != entriesStream.length()) { |
| - throw new CorruptIndexException("did not read all bytes from file \"" + entriesFileName + "\": read " + entriesStream.getFilePointer() + " vs size " + entriesStream.length() + " (resource: " + entriesStream + ")"); |
| + if (version >= CompoundFileWriter.VERSION_CHECKSUM) { |
| + CodecUtil.checkFooter(entriesStream); |
| + } else { |
| + CodecUtil.checkEOF(entriesStream); |
| } |
| return mapping; |
| } catch (IOException ioe) { |
| Index: lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java (working copy) |
| @@ -54,7 +54,8 @@ |
| // versioning for the .cfs file |
| static final String DATA_CODEC = "CompoundFileWriterData"; |
| static final int VERSION_START = 0; |
| - static final int VERSION_CURRENT = VERSION_START; |
| + static final int VERSION_CHECKSUM = 1; |
| + static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| // versioning for the .cfe file |
| static final String ENTRY_CODEC = "CompoundFileWriterEntries"; |
| @@ -140,6 +141,7 @@ |
| // open the compound stream |
| getOutput(); |
| assert dataOut != null; |
| + CodecUtil.writeFooter(dataOut); |
| } catch (IOException e) { |
| priorException = e; |
| } finally { |
| @@ -202,6 +204,7 @@ |
| entryOut.writeLong(fe.offset); |
| entryOut.writeLong(fe.length); |
| } |
| + CodecUtil.writeFooter(entryOut); |
| } |
| |
| IndexOutput createOutput(String name, IOContext context) throws IOException { |
| @@ -342,6 +345,11 @@ |
| writtenBytes += length; |
| delegate.writeBytes(b, offset, length); |
| } |
| + |
| + @Override |
| + public long getChecksum() throws IOException { |
| + return delegate.getChecksum(); |
| + } |
| } |
| |
| } |
| Index: lucene/core/src/java/org/apache/lucene/store/Directory.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/Directory.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/Directory.java (working copy) |
| @@ -100,8 +100,13 @@ |
| * <p>Throws {@link FileNotFoundException} or {@link NoSuchFileException} |
| * if the file does not exist. |
| */ |
| - public abstract IndexInput openInput(String name, IOContext context) throws IOException; |
| + public abstract IndexInput openInput(String name, IOContext context) throws IOException; |
| |
| + /** Returns a stream reading an existing file, computing checksum as it reads */ |
| + public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException { |
| + return new BufferedChecksumIndexInput(openInput(name, context)); |
| + } |
| + |
| /** Construct a {@link Lock}. |
| * @param name the name of the lock file |
| */ |
| Index: lucene/core/src/java/org/apache/lucene/store/IndexOutput.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/IndexOutput.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/IndexOutput.java (working copy) |
| @@ -43,6 +43,8 @@ |
| */ |
| public abstract long getFilePointer(); |
| |
| + /** Returns the current checksum of bytes written so far */ |
| + public abstract long getChecksum() throws IOException; |
| |
| /** The number of bytes in the file. */ |
| public abstract long length() throws IOException; |
| Index: lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java (revision 1583220) |
| +++ lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java (working copy) |
| @@ -18,6 +18,8 @@ |
| */ |
| |
| import java.io.IOException; |
| +import java.util.zip.CRC32; |
| +import java.util.zip.Checksum; |
| |
| /** |
| * A memory-resident {@link IndexOutput} implementation. |
| @@ -35,6 +37,8 @@ |
| private int bufferPosition; |
| private long bufferStart; |
| private int bufferLength; |
| + |
| + private Checksum crc = new BufferedChecksum(new CRC32()); |
| |
| /** Construct an empty output buffer. */ |
| public RAMOutputStream() { |
| @@ -95,6 +99,7 @@ |
| bufferStart = 0; |
| bufferLength = 0; |
| file.setLength(0); |
| + crc.reset(); |
| } |
| |
| @Override |
| @@ -113,6 +118,7 @@ |
| currentBufferIndex++; |
| switchCurrentBuffer(); |
| } |
| + crc.update(b); |
| currentBuffer[bufferPosition++] = b; |
| } |
| |
| @@ -119,6 +125,7 @@ |
| @Override |
| public void writeBytes(byte[] b, int offset, int len) throws IOException { |
| assert b != null; |
| + crc.update(b, offset, len); |
| while (len > 0) { |
| if (bufferPosition == bufferLength) { |
| currentBufferIndex++; |
| @@ -165,5 +172,10 @@ |
| /** Returns byte usage of all buffers. */ |
| public long sizeInBytes() { |
| return (long) file.numBuffers() * (long) BUFFER_SIZE; |
| - } |
| + } |
| + |
| + @Override |
| + public long getChecksum() throws IOException { |
| + return crc.getValue(); |
| + } |
| } |
| Index: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java (revision 0) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java (working copy) |
| @@ -0,0 +1,90 @@ |
| +package org.apache.lucene.index; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.analysis.MockAnalyzer; |
| +import org.apache.lucene.codecs.CodecUtil; |
| +import org.apache.lucene.codecs.lucene46.Lucene46Codec; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.NumericDocValuesField; |
| +import org.apache.lucene.store.CompoundFileDirectory; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.IOUtils; |
| +import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util.TestUtil; |
| + |
| +/** |
| + * Test that a plain default puts CRC32 footers in all files. |
| + */ |
| +public class TestAllFilesHaveChecksumFooter extends LuceneTestCase { |
| + public void test() throws Exception { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| + conf.setCodec(new Lucene46Codec()); |
| + RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); |
| + Document doc = new Document(); |
| + // these fields should sometimes get term vectors, etc |
| + Field idField = newStringField("id", "", Field.Store.NO); |
| + Field bodyField = newTextField("body", "", Field.Store.NO); |
| + Field dvField = new NumericDocValuesField("dv", 5); |
| + doc.add(idField); |
| + doc.add(bodyField); |
| + doc.add(dvField); |
| + for (int i = 0; i < 100; i++) { |
| + idField.setStringValue(Integer.toString(i)); |
| + bodyField.setStringValue(TestUtil.randomUnicodeString(random())); |
| + riw.addDocument(doc); |
| + if (random().nextInt(7) == 0) { |
| + riw.commit(); |
| + } |
| + if (random().nextInt(20) == 0) { |
| + riw.deleteDocuments(new Term("id", Integer.toString(i))); |
| + } |
| + } |
| + riw.close(); |
| + checkHeaders(dir); |
| + dir.close(); |
| + } |
| + |
| + private void checkHeaders(Directory dir) throws IOException { |
| + for (String file : dir.listAll()) { |
| + if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) { |
| + CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false); |
| + checkHeaders(cfsDir); // recurse into cfs |
| + cfsDir.close(); |
| + } |
| + IndexInput in = null; |
| + boolean success = false; |
| + try { |
| + in = dir.openInput(file, newIOContext(random())); |
| + CodecUtil.checksumEntireFile(in); |
| + success = true; |
| + } finally { |
| + if (success) { |
| + IOUtils.close(in); |
| + } else { |
| + IOUtils.closeWhileHandlingException(in); |
| + } |
| + } |
| + } |
| + } |
| +} |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (revision 1583220) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (working copy) |
| @@ -24,6 +24,7 @@ |
| import org.apache.lucene.codecs.lucene46.Lucene46Codec; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.NumericDocValuesField; |
| import org.apache.lucene.store.CompoundFileDirectory; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IndexInput; |
| @@ -39,14 +40,15 @@ |
| Directory dir = newDirectory(); |
| IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| conf.setCodec(new Lucene46Codec()); |
| - // riw should sometimes create docvalues fields, etc |
| RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); |
| Document doc = new Document(); |
| // these fields should sometimes get term vectors, etc |
| Field idField = newStringField("id", "", Field.Store.NO); |
| Field bodyField = newTextField("body", "", Field.Store.NO); |
| + Field dvField = new NumericDocValuesField("dv", 5); |
| doc.add(idField); |
| doc.add(bodyField); |
| + doc.add(dvField); |
| for (int i = 0; i < 100; i++) { |
| idField.setStringValue(Integer.toString(i)); |
| bodyField.setStringValue(TestUtil.randomUnicodeString(random())); |
| @@ -54,6 +56,10 @@ |
| if (random().nextInt(7) == 0) { |
| riw.commit(); |
| } |
| + // TODO: we should make a new format with a clean header... |
| + // if (random().nextInt(20) == 0) { |
| + // riw.deleteDocuments(new Term("id", Integer.toString(i))); |
| + // } |
| } |
| riw.close(); |
| checkHeaders(dir); |
| Index: lucene/core/src/test/org/apache/lucene/index/TestDoc.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestDoc.java (revision 1583220) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestDoc.java (working copy) |
| @@ -221,7 +221,7 @@ |
| |
| SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2), |
| si, InfoStream.getDefault(), trackingDir, |
| - MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context); |
| + MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); |
| |
| MergeState mergeState = merger.merge(); |
| r1.close(); |
| Index: lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1583220) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) |
| @@ -83,7 +83,7 @@ |
| |
| SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2), |
| si, InfoStream.getDefault(), mergedDir, |
| - MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random())); |
| + MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()), true); |
| MergeState mergeState = merger.merge(); |
| int docsMerged = mergeState.segmentInfo.getDocCount(); |
| assertTrue(docsMerged == 2); |
| Index: lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java (revision 0) |
| +++ lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java (working copy) |
| @@ -0,0 +1,68 @@ |
| +package org.apache.lucene.store; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.util.zip.CRC32; |
| +import java.util.zip.Checksum; |
| + |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class TestBufferedChecksum extends LuceneTestCase { |
| + |
| + public void testSimple() { |
| + Checksum c = new BufferedChecksum(new CRC32()); |
| + c.update(1); |
| + c.update(2); |
| + c.update(3); |
| + assertEquals(1438416925L, c.getValue()); |
| + } |
| + |
| + public void testRandom() { |
| + Checksum c1 = new CRC32(); |
| + Checksum c2 = new BufferedChecksum(new CRC32()); |
| + int iterations = atLeast(10000); |
| + for (int i = 0; i < iterations; i++) { |
| + switch(random().nextInt(4)) { |
| + case 0: |
| + // update(byte[], int, int) |
| + int length = random().nextInt(1024); |
| + byte bytes[] = new byte[length]; |
| + random().nextBytes(bytes); |
| + c1.update(bytes, 0, bytes.length); |
| + c2.update(bytes, 0, bytes.length); |
| + break; |
| + case 1: |
| + // update(int) |
| + int b = random().nextInt(256); |
| + c1.update(b); |
| + c2.update(b); |
| + break; |
| + case 2: |
| + // reset() |
| + c1.reset(); |
| + c2.reset(); |
| + break; |
| + case 3: |
| + // getValue() |
| + assertEquals(c1.getValue(), c2.getValue()); |
| + break; |
| + } |
| + } |
| + assertEquals(c1.getValue(), c2.getValue()); |
| + } |
| +} |
| |
| Property changes on: lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java (revision 1583220) |
| +++ lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java (working copy) |
| @@ -30,12 +30,13 @@ |
| public void testOverrides() throws Exception { |
| // verify that all methods of Directory are overridden by FilterDirectory, |
| // except those under the 'exclude' list |
| - Set<String> exclude = new HashSet<>(); |
| - exclude.add("copy"); |
| - exclude.add("createSlicer"); |
| + Set<Method> exclude = new HashSet<>(); |
| + exclude.add(Directory.class.getMethod("copy", Directory.class, String.class, String.class, IOContext.class)); |
| + exclude.add(Directory.class.getMethod("createSlicer", String.class, IOContext.class)); |
| + exclude.add(Directory.class.getMethod("openChecksumInput", String.class, IOContext.class)); |
| for (Method m : FilterDirectory.class.getMethods()) { |
| if (m.getDeclaringClass() == Directory.class) { |
| - assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m.getName())); |
| + assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m)); |
| } |
| } |
| } |
| Index: lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java |
| =================================================================== |
| --- lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java (revision 1583220) |
| +++ lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java (working copy) |
| @@ -167,6 +167,7 @@ |
| @Override public void flush() throws IOException { io.flush(); } |
| @Override public long getFilePointer() { return io.getFilePointer(); } |
| @Override public long length() throws IOException { return io.length(); } |
| + @Override public long getChecksum() throws IOException { return io.getChecksum(); } |
| } |
| |
| } |
| Index: lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java |
| =================================================================== |
| --- lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1583220) |
| +++ lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) |
| @@ -794,6 +794,11 @@ |
| return null; |
| } |
| |
| + @Override |
| + public void validate() throws IOException { |
| + // no-op |
| + } |
| + |
| private class MemoryFields extends Fields { |
| @Override |
| public Iterator<String> iterator() { |
| Index: lucene/misc/build.xml |
| =================================================================== |
| --- lucene/misc/build.xml (revision 1583220) |
| +++ lucene/misc/build.xml (working copy) |
| @@ -44,7 +44,7 @@ |
| <target name="build-native-unix" depends="install-cpptasks"> |
| <mkdir dir="${common.build.dir}/native"/> |
| |
| - <cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" > |
| + <cc outtype="shared" name="c++" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" > |
| <fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" /> |
| <includepath> |
| <pathelement location="${java.home}/../include"/> |
| @@ -54,7 +54,7 @@ |
| </includepath> |
| |
| <compilerarg value="-fPIC" /> |
| - <linkerarg value="-lstdc++" /> |
| + <syslibset libs="stdc++"/> |
| </cc> |
| </target> |
| |
| Index: lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java |
| =================================================================== |
| --- lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java (revision 1583220) |
| +++ lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java (working copy) |
| @@ -240,6 +240,11 @@ |
| public long length() { |
| return fileLength + bufferPos; |
| } |
| + |
| + @Override |
| + public long getChecksum() throws IOException { |
| + throw new UnsupportedOperationException("this directory currently does not work at all!"); |
| + } |
| |
| @Override |
| public void close() throws IOException { |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java (working copy) |
| @@ -306,5 +306,10 @@ |
| public long ramBytesUsed() { |
| return in.ramBytesUsed(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + in.validate(); |
| + } |
| } |
| } |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (working copy) |
| @@ -89,6 +89,11 @@ |
| public long ramBytesUsed() { |
| return in.ramBytesUsed(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + in.validate(); |
| + } |
| } |
| |
| static class AssertingFieldsConsumer extends FieldsConsumer { |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (working copy) |
| @@ -76,6 +76,11 @@ |
| public long ramBytesUsed() { |
| return in.ramBytesUsed(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + in.validate(); |
| + } |
| } |
| |
| enum Status { |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (working copy) |
| @@ -75,6 +75,11 @@ |
| public long ramBytesUsed() { |
| return in.ramBytesUsed(); |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + in.validate(); |
| + } |
| } |
| |
| enum Status { |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (working copy) |
| @@ -46,7 +46,7 @@ |
| import org.apache.lucene.util.packed.PackedInts.FormatAndBits; |
| import org.apache.lucene.util.packed.PackedInts; |
| |
| -import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT; |
| +import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_GCD_COMPRESSION; |
| import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BLOCK_SIZE; |
| import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BYTES; |
| import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.NUMBER; |
| @@ -71,10 +71,11 @@ |
| try { |
| String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); |
| data = state.directory.createOutput(dataName, state.context); |
| - CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT); |
| + // this writer writes the format 4.2 did! |
| + CodecUtil.writeHeader(data, dataCodec, VERSION_GCD_COMPRESSION); |
| String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); |
| meta = state.directory.createOutput(metaName, state.context); |
| - CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT); |
| + CodecUtil.writeHeader(meta, metaCodec, VERSION_GCD_COMPRESSION); |
| success = true; |
| } finally { |
| if (!success) { |
| Index: lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (working copy) |
| @@ -94,6 +94,9 @@ |
| } |
| return sizeInBytes; |
| } |
| + |
| + @Override |
| + public void validate() throws IOException {} |
| } |
| |
| static class RAMField extends Terms { |
| Index: lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java (working copy) |
| @@ -166,6 +166,11 @@ |
| } |
| |
| @Override |
| + public long getChecksum() throws IOException { |
| + return delegate.getChecksum(); |
| + } |
| + |
| + @Override |
| public String toString() { |
| return "MockIndexOutputWrapper(" + delegate + ")"; |
| } |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (working copy) |
| @@ -810,6 +810,7 @@ |
| } |
| c.setUseCompoundFile(r.nextBoolean()); |
| c.setReaderPooling(r.nextBoolean()); |
| + c.setValidateAtMerge(r.nextBoolean()); |
| return c; |
| } |
| |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (working copy) |
| @@ -243,6 +243,7 @@ |
| ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); |
| PrintStream infoStream = new PrintStream(bos, false, "UTF-8"); |
| |
| + reader.validate(); |
| FieldNormStatus fieldNormStatus = CheckIndex.testFieldNorms(reader, infoStream); |
| TermIndexStatus termIndexStatus = CheckIndex.testPostings(reader, infoStream); |
| StoredFieldStatus storedFieldStatus = CheckIndex.testStoredFields(reader, infoStream); |
| Index: lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java (revision 1583220) |
| +++ lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java (working copy) |
| @@ -145,4 +145,9 @@ |
| public void copyBytes(DataInput input, long numBytes) throws IOException { |
| delegate.copyBytes(input, numBytes); |
| } |
| + |
| + @Override |
| + public long getChecksum() throws IOException { |
| + return delegate.getChecksum(); |
| + } |
| } |
| Index: solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java |
| =================================================================== |
| --- solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java (revision 1583220) |
| +++ solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java (working copy) |
| @@ -88,5 +88,10 @@ |
| offset += len; |
| } |
| } |
| - |
| + |
| + @Override |
| + public long getChecksum() throws IOException { |
| + flush(); |
| + return dest.getChecksum(); |
| + } |
| } |
| Index: solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java |
| =================================================================== |
| --- solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java (revision 1583220) |
| +++ solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java (working copy) |
| @@ -66,5 +66,9 @@ |
| length = pos; |
| } |
| } |
| - |
| + |
| + @Override |
| + public long getChecksum() throws IOException { |
| + return 0; // we don't write anything. |
| + } |
| } |
| Index: solr/core/src/test/org/apache/solr/search/TestDocSet.java |
| =================================================================== |
| --- solr/core/src/test/org/apache/solr/search/TestDocSet.java (revision 1583220) |
| +++ solr/core/src/test/org/apache/solr/search/TestDocSet.java (working copy) |
| @@ -418,6 +418,10 @@ |
| @Override |
| public void document(int doc, StoredFieldVisitor visitor) { |
| } |
| + |
| + @Override |
| + public void validate() throws IOException { |
| + } |
| }; |
| } |
| |