blob: 9d3d33e19af6d4ec3b3971132d3fc4b5f369b49e [file] [log] [blame]
package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
import java.util.Set;
/**
* Class responsible for access to stored document fields.
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @deprecated
*/
@Deprecated
public final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
private final static int FORMAT_SIZE = 4;
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
/** Extension of stored fields index file */
public static final String FIELDS_INDEX_EXTENSION = "fdx";
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
// Lucene 3.2: NumericFields are stored in binary format
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
// NOTE: if you introduce a new format, make it 1 higher
// than the current one, and always change this if you
// switch to a new format!
public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
// NOTE: bit 0 is free here! You can steal it!
public static final int FIELD_IS_BINARY = 1 << 1;
// the old bit 1 << 2 was compressed, is now left out
private static final int _NUMERIC_BIT_SHIFT = 3;
static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
private final FieldInfos fieldInfos;
private final IndexInput fieldsStream;
private final IndexInput indexStream;
private int numTotalDocs;
private int size;
private boolean closed;
private final int format;
// The docID offset where our docs begin in the index
// file. This will be 0 if we have our own private file.
private int docStoreOffset;
// when we are inside a compound share doc store (CFX),
// (lucene 3.0 indexes only), we privately open our own fd.
private final CompoundFileDirectory storeCFSReader;
/** Returns a cloned FieldsReader that shares open
* IndexInputs with the original one. It is the caller's
* job not to close the original FieldsReader until all
* clones are called (eg, currently SegmentReader manages
* this logic). */
@Override
public Lucene3xStoredFieldsReader clone() {
ensureOpen();
return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
}
/** Verifies that the code version which wrote the segment is supported. */
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
try {
int format = idxStream.readInt();
if (format < FORMAT_MINIMUM)
throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format > FORMAT_CURRENT)
throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
} finally {
idxStream.close();
}
}
// Used only by clone
private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
IndexInput fieldsStream, IndexInput indexStream) {
this.fieldInfos = fieldInfos;
this.numTotalDocs = numTotalDocs;
this.size = size;
this.format = format;
this.docStoreOffset = docStoreOffset;
this.fieldsStream = fieldsStream;
this.indexStream = indexStream;
this.storeCFSReader = null;
}
public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
final String segment = si.getDocStoreSegment();
final int docStoreOffset = si.getDocStoreOffset();
final int size = si.docCount;
boolean success = false;
fieldInfos = fn;
try {
if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) {
d = storeCFSReader = new CompoundFileDirectory(si.dir,
IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
} else {
storeCFSReader = null;
}
fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
format = indexStream.readInt();
if (format < FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format > FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
final long indexSize = indexStream.length() - FORMAT_SIZE;
if (docStoreOffset != -1) {
// We read only a slice out of this shared fields file
this.docStoreOffset = docStoreOffset;
this.size = size;
// Verify the file is long enough to hold all of our
// docs
assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
} else {
this.docStoreOffset = 0;
this.size = (int) (indexSize >> 3);
// Verify two sources of "maxDoc" agree:
if (this.size != si.docCount) {
throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
}
}
numTotalDocs = (int) (indexSize >> 3);
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success) {
close();
}
}
}
/**
* @throws AlreadyClosedException if this FieldsReader is closed
*/
private void ensureOpen() throws AlreadyClosedException {
if (closed) {
throw new AlreadyClosedException("this FieldsReader is closed");
}
}
/**
* Closes the underlying {@link org.apache.lucene.store.IndexInput} streams.
* This means that the Fields values will not be accessible.
*
* @throws IOException
*/
public final void close() throws IOException {
if (!closed) {
IOUtils.close(fieldsStream, indexStream, storeCFSReader);
closed = true;
}
}
private void seekIndex(int docID) throws IOException {
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
}
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
seekIndex(n);
fieldsStream.seek(indexStream.readLong());
final int numFields = fieldsStream.readVInt();
for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
int bits = fieldsStream.readByte() & 0xFF;
assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
switch(visitor.needsField(fieldInfo)) {
case YES:
readField(visitor, fieldInfo, bits);
break;
case NO:
skipField(bits);
break;
case STOP:
return;
}
}
}
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
case FIELD_IS_NUMERIC_INT:
visitor.intField(info, fieldsStream.readInt());
return;
case FIELD_IS_NUMERIC_LONG:
visitor.longField(info, fieldsStream.readLong());
return;
case FIELD_IS_NUMERIC_FLOAT:
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
return;
case FIELD_IS_NUMERIC_DOUBLE:
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
return;
default:
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
final int length = fieldsStream.readVInt();
byte bytes[] = new byte[length];
fieldsStream.readBytes(bytes, 0, length);
if ((bits & FIELD_IS_BINARY) != 0) {
visitor.binaryField(info, bytes, 0, bytes.length);
} else {
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
}
}
}
private void skipField(int bits) throws IOException {
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
case FIELD_IS_NUMERIC_INT:
case FIELD_IS_NUMERIC_FLOAT:
fieldsStream.readInt();
return;
case FIELD_IS_NUMERIC_LONG:
case FIELD_IS_NUMERIC_DOUBLE:
fieldsStream.readLong();
return;
default:
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
final int length = fieldsStream.readVInt();
fieldsStream.seek(fieldsStream.getFilePointer() + length);
}
}
// note: if there are shared docstores, we are also called by Lucene3xCodec even in
// the CFS case. so logic here must handle this.
public static void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.getDocStoreOffset() != -1) {
assert info.getDocStoreSegment() != null;
if (info.getDocStoreIsCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
} else {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
}
} else if (!info.getUseCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
}
}
}