blob: ad48f0390200f1b6a6b96324b3eaaa89ec0ac611 [file] [log] [blame]
package org.apache.lucene.index.codecs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Collection;
import java.util.Comparator;
import java.io.IOException;
import org.apache.lucene.index.IndexFileNames;
/** @lucene.experimental */
public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
// NOTE: long is overkill here, since this number is 128
// by default and only indexDivisor * 128 if you change
// the indexDivisor at search time. But, we use this in a
// number of places to multiply out the actual ord, and we
// will overflow int during those multiplies. So to avoid
// having to upgrade each multiple to long in multiple
// places (error prone), we use long here:
private long totalIndexInterval;
private int indexDivisor;
final private int indexInterval;
// Closed if indexLoaded is true:
private IndexInput in;
private volatile boolean indexLoaded;
private final Comparator<BytesRef> termComp;
private final static int PAGED_BYTES_BITS = 15;
// all fields share this single logical byte[]
private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
private PagedBytes.Reader termBytesReader;
final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
// start of the field info data
protected long dirOffset;
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp, int codecId)
throws IOException {
this.termComp = termComp;
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
boolean success = false;
try {
readHeader(in);
indexInterval = in.readInt();
this.indexDivisor = indexDivisor;
if (indexDivisor < 0) {
totalIndexInterval = indexInterval;
} else {
// In case terms index gets loaded, later, on demand
totalIndexInterval = indexInterval * indexDivisor;
}
assert totalIndexInterval > 0;
seekDir(in, dirOffset);
// Read directory
final int numFields = in.readVInt();
//System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
final int numIndexTerms = in.readVInt();
final long termsStart = in.readVLong();
final long indexStart = in.readVLong();
final long packedIndexStart = in.readVLong();
final long packedOffsetsStart = in.readVLong();
assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment;
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
}
success = true;
} finally {
if (!success) IOUtils.closeSafely(true, in);
if (indexDivisor > 0) {
in.close();
in = null;
if (success) {
indexLoaded = true;
}
termBytesReader = termBytes.freeze(true);
}
}
}
@Override
public int getDivisor() {
return indexDivisor;
}
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_START);
dirOffset = input.readLong();
}
private class IndexEnum extends FieldIndexEnum {
private final FieldIndexData.CoreFieldIndex fieldIndex;
private final BytesRef term = new BytesRef();
private long ord;
public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
this.fieldIndex = fieldIndex;
}
@Override
public BytesRef term() {
return term;
}
@Override
public long seek(BytesRef target) {
int lo = 0; // binary search
int hi = fieldIndex.numIndexTerms - 1;
assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
while (hi >= lo) {
int mid = (lo + hi) >>> 1;
final long offset = fieldIndex.termOffsets.get(mid);
final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
int delta = termComp.compare(target, term);
if (delta < 0) {
hi = mid - 1;
} else if (delta > 0) {
lo = mid + 1;
} else {
assert mid >= 0;
ord = mid*totalIndexInterval;
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
}
}
if (hi < 0) {
assert hi == -1;
hi = 0;
}
final long offset = fieldIndex.termOffsets.get(hi);
final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
ord = hi*totalIndexInterval;
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
}
@Override
public long next() {
final int idx = 1 + (int) (ord / totalIndexInterval);
if (idx >= fieldIndex.numIndexTerms) {
return -1;
}
ord += totalIndexInterval;
final long offset = fieldIndex.termOffsets.get(idx);
final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
}
@Override
public long ord() {
return ord;
}
@Override
public long seek(long ord) {
int idx = (int) (ord / totalIndexInterval);
// caller must ensure ord is in bounds
assert idx < fieldIndex.numIndexTerms;
final long offset = fieldIndex.termOffsets.get(idx);
final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
this.ord = idx * totalIndexInterval;
return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
}
}
@Override
public boolean supportsOrd() {
return true;
}
private final class FieldIndexData {
final private FieldInfo fieldInfo;
volatile CoreFieldIndex coreIndex;
private final long indexStart;
private final long termsStart;
private final long packedIndexStart;
private final long packedOffsetsStart;
private final int numIndexTerms;
public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
long packedOffsetsStart) throws IOException {
this.fieldInfo = fieldInfo;
this.termsStart = termsStart;
this.indexStart = indexStart;
this.packedIndexStart = packedIndexStart;
this.packedOffsetsStart = packedOffsetsStart;
this.numIndexTerms = numIndexTerms;
if (indexDivisor > 0) {
loadTermsIndex();
}
}
public void loadTermsIndex() throws IOException {
if (coreIndex == null) {
coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms);
}
}
private final class CoreFieldIndex {
// where this field's terms begin in the packed byte[]
// data
final long termBytesStart;
// offset into index termBytes
final PackedInts.Reader termOffsets;
// index pointers into main terms dict
final PackedInts.Reader termsDictOffsets;
final int numIndexTerms;
final long termsStart;
public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException {
this.termsStart = termsStart;
termBytesStart = termBytes.getPointer();
IndexInput clone = (IndexInput) in.clone();
clone.seek(indexStart);
// -1 is passed to mean "don't load term index", but
// if we are then later loaded it's overwritten with
// a real value
assert indexDivisor > 0;
this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor;
assert this.numIndexTerms > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor;
if (indexDivisor == 1) {
// Default (load all index terms) is fast -- slurp in the images from disk:
try {
final long numTermBytes = packedIndexStart - indexStart;
termBytes.copy(clone, numTermBytes);
// records offsets into main terms dict file
termsDictOffsets = PackedInts.getReader(clone);
assert termsDictOffsets.size() == numIndexTerms;
// records offsets into byte[] term data
termOffsets = PackedInts.getReader(clone);
assert termOffsets.size() == 1+numIndexTerms;
} finally {
clone.close();
}
} else {
// Get packed iterators
final IndexInput clone1 = (IndexInput) in.clone();
final IndexInput clone2 = (IndexInput) in.clone();
try {
// Subsample the index terms
clone1.seek(packedIndexStart);
final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1);
clone2.seek(packedOffsetsStart);
final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2);
// TODO: often we can get by w/ fewer bits per
// value, below.. .but this'd be more complex:
// we'd have to try @ fewer bits and then grow
// if we overflowed it.
PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue());
PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue());
termsDictOffsets = termsDictOffsetsM;
termOffsets = termOffsetsM;
int upto = 0;
long termOffsetUpto = 0;
while(upto < this.numIndexTerms) {
// main file offset copies straight over
termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
termOffsetsM.set(upto, termOffsetUpto);
long termOffset = termOffsetsIter.next();
long nextTermOffset = termOffsetsIter.next();
final int numTermBytes = (int) (nextTermOffset - termOffset);
clone.seek(indexStart + termOffset);
assert indexStart + termOffset < clone.length() : "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.length();
assert indexStart + termOffset + numTermBytes < clone.length();
termBytes.copy(clone, numTermBytes);
termOffsetUpto += numTermBytes;
upto++;
if (upto == this.numIndexTerms) {
break;
}
// skip terms:
termsDictOffsetsIter.next();
for(int i=0;i<indexDivisor-2;i++) {
termOffsetsIter.next();
termsDictOffsetsIter.next();
}
}
termOffsetsM.set(upto, termOffsetUpto);
} finally {
clone1.close();
clone2.close();
clone.close();
}
}
}
}
}
// Externally synced in IndexWriter
@Override
public void loadTermsIndex(int indexDivisor) throws IOException {
if (!indexLoaded) {
if (indexDivisor < 0) {
this.indexDivisor = -indexDivisor;
} else {
this.indexDivisor = indexDivisor;
}
this.totalIndexInterval = indexInterval * this.indexDivisor;
Iterator<FieldIndexData> it = fields.values().iterator();
while(it.hasNext()) {
it.next().loadTermsIndex();
}
indexLoaded = true;
in.close();
termBytesReader = termBytes.freeze(true);
}
}
@Override
public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
final FieldIndexData fieldData = fields.get(fieldInfo);
if (fieldData.coreIndex == null) {
return null;
} else {
return new IndexEnum(fieldData.coreIndex);
}
}
public static void files(Directory dir, SegmentInfo info, int id, Collection<String> files) {
files.add(IndexFileNames.segmentFileName(info.name, id, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
}
public static void getIndexExtensions(Collection<String> extensions) {
extensions.add(FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
}
@Override
public void getExtensions(Collection<String> extensions) {
getIndexExtensions(extensions);
}
@Override
public void close() throws IOException {
if (in != null && !indexLoaded) {
in.close();
}
if (termBytesReader != null) {
termBytesReader.close();
}
}
protected void seekDir(IndexInput input, long dirOffset) throws IOException {
input.seek(dirOffset);
}
}