blob: cde000bc265ed747ea52ce7749776e5754ebb14b [file] [log] [blame]
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
index 240d16d..32b03e5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.CodecUtil;
/**
* Lucene 4.0 Stored Fields Format.
@@ -42,7 +43,8 @@ import org.apache.lucene.store.IOContext;
* <p>This contains, for each document, a pointer to its field data, as
* follows:</p>
* <ul>
- * <li>FieldIndex (.fdx) --&gt; &lt;FieldValuesPosition&gt; <sup>SegSize</sup></li>
+ * <li>FieldIndex (.fdx) --&gt; &lt;Header&gt;, &lt;FieldValuesPosition&gt; <sup>SegSize</sup></li>
+ * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>FieldValuesPosition --&gt; {@link DataOutput#writeLong Uint64}</li>
* </ul>
* </li>
@@ -50,7 +52,8 @@ import org.apache.lucene.store.IOContext;
* <p><a name="field_data" id="field_data"></a>The field data, or <tt>.fdt</tt> file.</p>
* <p>This contains the stored fields of each document, as follows:</p>
* <ul>
- * <li>FieldData (.fdt) --&gt; &lt;DocFieldData&gt; <sup>SegSize</sup></li>
+ * <li>FieldData (.fdt) --&gt; &lt;Header&gt;, &lt;DocFieldData&gt; <sup>SegSize</sup></li>
+ * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DocFieldData --&gt; FieldCount, &lt;FieldNum, Bits, Value&gt;
* <sup>FieldCount</sup></li>
* <li>FieldCount --&gt; {@link DataOutput#writeVInt VInt}</li>
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
index ab89821..99cfe4f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
@@ -30,11 +30,14 @@ import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
import java.util.Set;
+import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*;
+
/**
* Class responsible for access to stored document fields.
* <p/>
@@ -44,8 +47,6 @@ import java.util.Set;
* @lucene.internal
*/
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
- private final static int FORMAT_SIZE = 4;
-
private final FieldInfos fieldInfos;
private final IndexInput fieldsStream;
private final IndexInput indexStream;
@@ -78,17 +79,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
boolean success = false;
fieldInfos = fn;
try {
- fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
- final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
+ fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
+ final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
- // its a 4.0 codec: so its not too-old, its corrupt.
- // TODO: change this to CodecUtil.checkHeader
- if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
- throw new CorruptIndexException("unexpected fdx header: " + indexStream);
- }
-
- final long indexSize = indexStream.length() - FORMAT_SIZE;
+ CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT);
+ CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT);
+ assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
+ assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
+ final long indexSize = indexStream.length() - HEADER_LENGTH_IDX;
this.size = (int) (indexSize >> 3);
// Verify two sources of "maxDoc" agree:
if (this.size != si.docCount) {
@@ -135,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
}
private void seekIndex(int docID) throws IOException {
- indexStream.seek(FORMAT_SIZE + docID * 8L);
+ indexStream.seek(HEADER_LENGTH_IDX + docID * 8L);
}
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
@@ -148,7 +147,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
int bits = fieldsStream.readByte() & 0xFF;
- assert bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
+ assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
switch(visitor.needsField(fieldInfo)) {
case YES:
@@ -164,19 +163,19 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
}
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
- final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
+ final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FIELD_IS_NUMERIC_INT:
visitor.intField(info, fieldsStream.readInt());
return;
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FIELD_IS_NUMERIC_LONG:
visitor.longField(info, fieldsStream.readLong());
return;
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ case FIELD_IS_NUMERIC_FLOAT:
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
return;
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ case FIELD_IS_NUMERIC_DOUBLE:
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
return;
default:
@@ -186,7 +185,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
final int length = fieldsStream.readVInt();
byte bytes[] = new byte[length];
fieldsStream.readBytes(bytes, 0, length);
- if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
+ if ((bits & FIELD_IS_BINARY) != 0) {
visitor.binaryField(info, bytes, 0, bytes.length);
} else {
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
@@ -195,15 +194,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
}
private void skipField(int bits) throws IOException {
- final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
+ final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ case FIELD_IS_NUMERIC_INT:
+ case FIELD_IS_NUMERIC_FLOAT:
fieldsStream.readInt();
return;
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
- case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ case FIELD_IS_NUMERIC_LONG:
+ case FIELD_IS_NUMERIC_DOUBLE:
fieldsStream.readLong();
return;
default:
@@ -242,7 +241,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
}
public static void files(SegmentInfo info, Set<String> files) throws IOException {
- files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
- files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
index c236d9c..15f2ea5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
@@ -34,6 +34,7 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/**
@@ -62,16 +63,14 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
- // (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
- static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+ static final String CODEC_NAME_IDX = "Lucene40StoredFieldsIndex";
+ static final String CODEC_NAME_DAT = "Lucene40StoredFieldsData";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+ static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
+ static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
- // NOTE: if you introduce a new format, make it 1 higher
- // than the current one, and always change this if you
- // switch to a new format!
- static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
- // when removing support for old versions, leave the last supported version here
- static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
@@ -94,9 +93,10 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
- fieldsStream.writeInt(FORMAT_CURRENT);
- indexStream.writeInt(FORMAT_CURRENT);
-
+ CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
+ CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
+ assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
+ assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
success = true;
} finally {
if (!success) {
@@ -209,7 +209,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
@Override
public void finish(int numDocs) throws IOException {
- if (4+((long) numDocs)*8 != indexStream.getFilePointer())
+ if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexStream.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
// throw an exception to prevent the corruption from
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
index b7fc812..7f39676 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.CodecUtil;
/**
* Lucene 4.0 Term Vectors format.
@@ -38,10 +39,10 @@ import org.apache.lucene.store.IOContext;
* <p>The Document Index or .tvx file.</p>
* <p>For each document, this stores the offset into the document data (.tvd) and
* field data (.tvf) files.</p>
- * <p>DocumentIndex (.tvx) --&gt; TVXVersion&lt;DocumentPosition,FieldPosition&gt;
+ * <p>DocumentIndex (.tvx) --&gt; Header,&lt;DocumentPosition,FieldPosition&gt;
* <sup>NumDocs</sup></p>
* <ul>
- * <li>TVXVersion --&gt; {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
+ * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DocumentPosition --&gt; {@link DataOutput#writeLong UInt64} (offset in the .tvd file)</li>
* <li>FieldPosition --&gt; {@link DataOutput#writeLong UInt64} (offset in the .tvf file)</li>
* </ul>
@@ -53,10 +54,10 @@ import org.apache.lucene.store.IOContext;
* in the .tvf (Term Vector Fields) file.</p>
* <p>The .tvd file is used to map out the fields that have term vectors stored
* and where the field information is in the .tvf file.</p>
- * <p>Document (.tvd) --&gt; TVDVersion&lt;NumFields, FieldNums,
+ * <p>Document (.tvd) --&gt; Header,&lt;NumFields, FieldNums,
* FieldPositions&gt; <sup>NumDocs</sup></p>
* <ul>
- * <li>TVDVersion --&gt; {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
+ * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>NumFields --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldNums --&gt; &lt;FieldNumDelta&gt; <sup>NumFields</sup></li>
* <li>FieldNumDelta --&gt; {@link DataOutput#writeVInt VInt}</li>
@@ -69,10 +70,10 @@ import org.apache.lucene.store.IOContext;
* <p>This file contains, for each field that has a term vector stored, a list of
* the terms, their frequencies and, optionally, position and offset
* information.</p>
- * <p>Field (.tvf) --&gt; TVFVersion&lt;NumTerms, Position/Offset, TermFreqs&gt;
+ * <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Position/Offset, TermFreqs&gt;
* <sup>NumFields</sup></p>
* <ul>
- * <li>TVFVersion --&gt; {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
+ * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>NumTerms --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>Position/Offset --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, Offsets?&gt;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
index e44713b..c0bee03 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
@@ -33,8 +33,6 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexFormatTooNewException;
-import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -43,8 +41,10 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
+
/**
* Lucene 4.0 Term Vectors reader.
* <p>
@@ -54,22 +54,6 @@ import org.apache.lucene.util.IOUtils;
*/
public class Lucene40TermVectorsReader extends TermVectorsReader {
- // NOTE: if you make a new format, it must be larger than
- // the current format
-
- // Changed strings to UTF8 with length-in-bytes not length-in-chars
- static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
-
- // NOTE: always change this if you switch to a new format!
- // whenever you add a new format, make it 1 larger (positive version logic)!
- static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
-
- // when removing support for old versions, leave the last supported version here
- static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES;
-
- //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
- static final int FORMAT_SIZE = 4;
-
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
@@ -82,6 +66,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
/** Extension of vectors index file */
static final String VECTORS_INDEX_EXTENSION = "tvx";
+
+ static final String CODEC_NAME_FIELDS = "Lucene40TermVectorsFields";
+ static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
+ static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";
+
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
+ static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
+ static final long HEADER_LENGTH_INDEX = CodecUtil.headerLength(CODEC_NAME_INDEX);
private FieldInfos fieldInfos;
@@ -91,17 +86,15 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private int size;
private int numTotalDocs;
- private final int format;
// used by clone
- Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int format) {
+ Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs) {
this.fieldInfos = fieldInfos;
this.tvx = tvx;
this.tvd = tvd;
this.tvf = tvf;
this.size = size;
this.numTotalDocs = numTotalDocs;
- this.format = format;
}
public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
@@ -114,18 +107,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
try {
String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
tvx = d.openInput(idxName, context);
- format = checkValidFormat(tvx);
+ final int tvxVersion = CodecUtil.checkHeader(tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT);
+
String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
tvd = d.openInput(fn, context);
- final int tvdFormat = checkValidFormat(tvd);
+ final int tvdVersion = CodecUtil.checkHeader(tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT);
fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
tvf = d.openInput(fn, context);
- final int tvfFormat = checkValidFormat(tvf);
+ final int tvfVersion = CodecUtil.checkHeader(tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT);
+ assert HEADER_LENGTH_INDEX == tvx.getFilePointer();
+ assert HEADER_LENGTH_DOCS == tvd.getFilePointer();
+ assert HEADER_LENGTH_FIELDS == tvf.getFilePointer();
+ assert tvxVersion == tvdVersion;
+ assert tvxVersion == tvfVersion;
- assert format == tvdFormat;
- assert format == tvfFormat;
-
- numTotalDocs = (int) (tvx.length() >> 4);
+ numTotalDocs = (int) (tvx.length()-HEADER_LENGTH_INDEX >> 4);
this.size = numTotalDocs;
assert size == 0 || numTotalDocs == size;
@@ -156,13 +152,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
// Not private to avoid synthetic access$NNN methods
void seekTvx(final int docNum) throws IOException {
- tvx.seek(docNum * 16L + FORMAT_SIZE);
- }
-
- boolean canReadRawDocs() {
- // we can always read raw docs, unless the term vectors
- // didn't exist
- return format != 0;
+ tvx.seek(docNum * 16L + HEADER_LENGTH_INDEX);
}
/** Retrieve the length (in bytes) of the tvd and tvf
@@ -210,16 +200,6 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
}
}
- private int checkValidFormat(IndexInput in) throws CorruptIndexException, IOException
- {
- int format = in.readInt();
- if (format < FORMAT_MINIMUM)
- throw new IndexFormatTooOldException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
- if (format > FORMAT_CURRENT)
- throw new IndexFormatTooNewException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
- return format;
- }
-
public void close() throws IOException {
IOUtils.close(tvx, tvd, tvf);
}
@@ -708,7 +688,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
cloneTvf = (IndexInput) tvf.clone();
}
- return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, format);
+ return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs);
}
public static void files(SegmentInfo info, Set<String> files) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
index 372db23..a61c321 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
@@ -35,9 +35,13 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
+import static org.apache.lucene.codecs.lucene40.Lucene40TermVectorsReader.*;
+
+
// TODO: make a new 4.0 TV format that encodes better
// - use startOffset (not endOffset) as base for delta on
// next startOffset because today for syns or ngrams or
@@ -58,6 +62,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
private final Directory directory;
private final String segment;
private IndexOutput tvx = null, tvd = null, tvf = null;
+
+
public Lucene40TermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
this.directory = directory;
@@ -66,11 +72,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
try {
// Open files for TermVector storage
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), context);
- tvx.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
+ CodecUtil.writeHeader(tvx, CODEC_NAME_INDEX, VERSION_CURRENT);
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
- tvd.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
+ CodecUtil.writeHeader(tvd, CODEC_NAME_DOCS, VERSION_CURRENT);
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
- tvf.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
+ CodecUtil.writeHeader(tvf, CODEC_NAME_FIELDS, VERSION_CURRENT);
+ assert HEADER_LENGTH_INDEX == tvx.getFilePointer();
+ assert HEADER_LENGTH_DOCS == tvd.getFilePointer();
+ assert HEADER_LENGTH_FIELDS == tvf.getFilePointer();
success = true;
} finally {
if (!success) {
@@ -252,10 +261,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
- // If the TV* files are an older format then they cannot read raw docs:
- if (((Lucene40TermVectorsReader)vectorsReader).canReadRawDocs()) {
matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
- }
}
}
if (reader.liveDocs != null) {
@@ -356,7 +362,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
@Override
public void finish(int numDocs) throws IOException {
- if (4+((long) numDocs)*16 != tvx.getFilePointer())
+ if (HEADER_LENGTH_INDEX+((long) numDocs)*16 != tvx.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
// throw an exception to prevent the corruption from
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
index 4ab1bd9..7fb3dd9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
@@ -236,27 +236,34 @@ public final class Bytes {
private IndexOutput datOut;
protected BytesRef bytesRef = new BytesRef();
private final Directory dir;
- private final String codecName;
+ private final String codecNameIdx;
+ private final String codecNameDat;
private final int version;
private final IOContext context;
- protected BytesWriterBase(Directory dir, String id, String codecName,
+ protected BytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
super(bytesUsed, type);
this.id = id;
this.dir = dir;
- this.codecName = codecName;
+ this.codecNameIdx = codecNameIdx;
+ this.codecNameDat = codecNameDat;
this.version = version;
this.context = context;
+ assert codecNameDat != null || codecNameIdx != null: "both codec names are null";
+ assert (codecNameDat != null && !codecNameDat.equals(codecNameIdx))
+ || (codecNameIdx != null && !codecNameIdx.equals(codecNameDat)):
+ "index and data codec names must not be equal";
}
protected IndexOutput getOrCreateDataOut() throws IOException {
if (datOut == null) {
boolean success = false;
+ assert codecNameDat != null;
try {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
DocValuesWriterBase.DATA_EXTENSION), context);
- CodecUtil.writeHeader(datOut, codecName, version);
+ CodecUtil.writeHeader(datOut, codecNameDat, version);
success = true;
} finally {
if (!success) {
@@ -279,9 +286,10 @@ public final class Bytes {
boolean success = false;
try {
if (idxOut == null) {
+ assert codecNameIdx != null;
idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
DocValuesWriterBase.INDEX_EXTENSION), context);
- CodecUtil.writeHeader(idxOut, codecName, version);
+ CodecUtil.writeHeader(idxOut, codecNameIdx, version);
}
success = true;
} finally {
@@ -308,8 +316,8 @@ public final class Bytes {
protected final int version;
protected final String id;
protected final Type type;
-
- protected BytesReaderBase(Directory dir, String id, String codecName,
+
+ protected BytesReaderBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int maxVersion, boolean doIndex, IOContext context, Type type) throws IOException {
IndexInput dataIn = null;
IndexInput indexIn = null;
@@ -317,11 +325,11 @@ public final class Bytes {
try {
dataIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
DocValuesWriterBase.DATA_EXTENSION), context);
- version = CodecUtil.checkHeader(dataIn, codecName, maxVersion, maxVersion);
+ version = CodecUtil.checkHeader(dataIn, codecNameDat, maxVersion, maxVersion);
if (doIndex) {
indexIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
DocValuesWriterBase.INDEX_EXTENSION), context);
- final int version2 = CodecUtil.checkHeader(indexIn, codecName,
+ final int version2 = CodecUtil.checkHeader(indexIn, codecNameIdx,
maxVersion, maxVersion);
assert version == version2;
}
@@ -377,23 +385,23 @@ public final class Bytes {
protected final boolean fasterButMoreRam;
protected long maxBytes = 0;
- protected DerefBytesWriterBase(Directory dir, String id, String codecName,
+ protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int codecVersion, Counter bytesUsed, IOContext context, Type type)
throws IOException {
- this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
+ this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
}
- protected DerefBytesWriterBase(Directory dir, String id, String codecName,
+ protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
throws IOException {
- this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
+ this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
}
- protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
+ protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
- super(dir, id, codecName, codecVersion, bytesUsed, context, type);
+ super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
index 7c745b9..c7e8740 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
@@ -39,14 +39,16 @@ import org.apache.lucene.util.packed.PackedInts;
*/
class FixedDerefBytesImpl {
- static final String CODEC_NAME = "FixedDerefBytes";
+ static final String CODEC_NAME_IDX = "FixedDerefBytesIdx";
+ static final String CODEC_NAME_DAT = "FixedDerefBytesDat";
+
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
public static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
}
@Override
@@ -71,7 +73,7 @@ class FixedDerefBytesImpl {
private final int size;
private final int numValuesStored;
FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_FIXED_DEREF);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_FIXED_DEREF);
size = datIn.readInt();
numValuesStored = idxIn.readInt();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
index 2ab1700..278cb89 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
@@ -49,7 +49,8 @@ import org.apache.lucene.util.packed.PackedInts;
*/
class FixedSortedBytesImpl {
- static final String CODEC_NAME = "FixedSortedBytes";
+ static final String CODEC_NAME_IDX = "FixedSortedBytesIdx";
+ static final String CODEC_NAME_DAT = "FixedSortedBytesDat";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
@@ -58,7 +59,7 @@ class FixedSortedBytesImpl {
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
this.comp = comp;
}
@@ -127,7 +128,7 @@ class FixedSortedBytesImpl {
public Reader(Directory dir, String id, int maxDoc, IOContext context,
Type type, Comparator<BytesRef> comparator) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
size = datIn.readInt();
valueCount = idxIn.readInt();
this.comparator = comparator;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
index fd779ae..ced34f3 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
@@ -61,14 +61,14 @@ class FixedStraightBytesImpl {
private final int byteBlockSize = BYTE_BLOCK_SIZE;
private final ByteBlockPool pool;
- protected FixedBytesWriterBase(Directory dir, String id, String codecName,
+ protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
int version, Counter bytesUsed, IOContext context) throws IOException {
- this(dir, id, codecName, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
+ this(dir, id, codecNameDat, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
}
- protected FixedBytesWriterBase(Directory dir, String id, String codecName,
+ protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
- super(dir, id, codecName, version, bytesUsed, context, type);
+ super(dir, id, null, codecNameDat, version, bytesUsed, context, type);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
pool.nextBuffer();
}
@@ -139,8 +139,8 @@ class FixedStraightBytesImpl {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
}
- public Writer(Directory dir, String id, String codecName, int version, Counter bytesUsed, IOContext context) throws IOException {
- super(dir, id, codecName, version, bytesUsed, context);
+ public Writer(Directory dir, String id, String codecNameDat, int version, Counter bytesUsed, IOContext context) throws IOException {
+ super(dir, id, codecNameDat, version, bytesUsed, context);
}
@@ -268,8 +268,8 @@ class FixedStraightBytesImpl {
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, Type.BYTES_FIXED_STRAIGHT);
}
- protected FixedStraightReader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context, Type type) throws IOException {
- super(dir, id, codec, version, false, context, type);
+ protected FixedStraightReader(Directory dir, String id, String codecNameDat, int version, int maxDoc, IOContext context, Type type) throws IOException {
+ super(dir, id, null, codecNameDat, version, false, context, type);
size = datIn.readInt();
this.maxDoc = maxDoc;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
index 43bff79..fa46bf6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
@@ -41,7 +41,9 @@ import org.apache.lucene.util.packed.PackedInts;
*/
class VarDerefBytesImpl {
- static final String CODEC_NAME = "VarDerefBytes";
+ static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
+ static final String CODEC_NAME_DAT = "VarDerefBytesDat";
+
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
@@ -57,7 +59,7 @@ class VarDerefBytesImpl {
static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
size = 0;
}
@@ -93,7 +95,7 @@ class VarDerefBytesImpl {
public static class VarDerefReader extends BytesReaderBase {
private final long totalBytes;
VarDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_DEREF);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_VAR_DEREF);
totalBytes = idxIn.readLong();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
index 9a8e87d..87c3f65 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
@@ -50,7 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
*/
final class VarSortedBytesImpl {
- static final String CODEC_NAME = "VarDerefBytes";
+ static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
+ static final String CODEC_NAME_DAT = "VarDerefBytesDat";
+
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
@@ -59,7 +61,7 @@ final class VarSortedBytesImpl {
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
this.comp = comp;
size = 0;
}
@@ -154,7 +156,7 @@ final class VarSortedBytesImpl {
Reader(Directory dir, String id, int maxDoc,
IOContext context, Type type, Comparator<BytesRef> comparator)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
this.comparator = comparator;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
index cfb9d78..ba18691 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
@@ -50,7 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
*/
class VarStraightBytesImpl {
- static final String CODEC_NAME = "VarStraightBytes";
+ static final String CODEC_NAME_IDX = "VarStraightBytesIdx";
+ static final String CODEC_NAME_DAT = "VarStraightBytesDat";
+
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
@@ -64,7 +66,7 @@ class VarStraightBytesImpl {
private boolean merge = false;
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
docToAddress = new long[1];
pool.nextBuffer(); // init
@@ -236,7 +238,7 @@ class VarStraightBytesImpl {
final int maxDoc;
VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT);
+ super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT);
this.maxDoc = maxDoc;
}