blob: cead17cb8573a1a4e76420cc2b23620a71e68afe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.MathUtil;
/**
* This abstract class reads skip lists with multiple levels.
*
* See {@link MultiLevelSkipListWriter} for the information about the encoding
* of the multi level skip lists.
*
* Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
* which defines the actual format of the skip data.
* @lucene.experimental
*/
public abstract class MultiLevelSkipListReader implements Closeable {
/** the maximum number of skip levels possible for this index */
protected int maxNumberOfSkipLevels;
/** number of levels in this skip list */
protected int numberOfSkipLevels;
// Expert: defines the number of top skip levels to buffer in memory.
// Reducing this number results in less memory usage, but possibly
// slower performance due to more random I/Os.
// Please notice that the space each level occupies is limited by
// the skipInterval. The top level can not contain more than
// skipLevel entries, the second top level can not contain more
// than skipLevel^2 entries and so forth.
private int numberOfLevelsToBuffer = 1;
private int docCount;
/** skipStream for each level. */
private IndexInput[] skipStream;
/** The start pointer of each skip level. */
private long skipPointer[];
/** skipInterval of each level. */
private int skipInterval[];
/** Number of docs skipped per level.
* It's possible for some values to overflow a signed int, but this has been accounted for.
*/
private int[] numSkipped;
/** Doc id of current skip entry per level. */
protected int[] skipDoc;
/** Doc id of last read skip entry with docId <= target. */
private int lastDoc;
/** Child pointer of current skip entry per level. */
private long[] childPointer;
/** childPointer of last read skip entry with docId <=
* target. */
private long lastChildPointer;
private boolean inputIsBuffered;
private final int skipMultiplier;
/** Creates a {@code MultiLevelSkipListReader}. */
protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
this.skipStream = new IndexInput[maxSkipLevels];
this.skipPointer = new long[maxSkipLevels];
this.childPointer = new long[maxSkipLevels];
this.numSkipped = new int[maxSkipLevels];
this.maxNumberOfSkipLevels = maxSkipLevels;
this.skipInterval = new int[maxSkipLevels];
this.skipMultiplier = skipMultiplier;
this.skipStream [0]= skipStream;
this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
this.skipInterval[0] = skipInterval;
for (int i = 1; i < maxSkipLevels; i++) {
// cache skip intervals
this.skipInterval[i] = this.skipInterval[i - 1] * skipMultiplier;
}
skipDoc = new int[maxSkipLevels];
}
/** Creates a {@code MultiLevelSkipListReader}, where
* {@code skipInterval} and {@code skipMultiplier} are
* the same. */
protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
this(skipStream, maxSkipLevels, skipInterval, skipInterval);
}
/** Returns the id of the doc to which the last call of {@link #skipTo(int)}
* has skipped. */
public int getDoc() {
return lastDoc;
}
/** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. Returns the current doc count.
*/
public int skipTo(int target) throws IOException {
// walk up the levels until highest level is found that has a skip
// for this target
int level = 0;
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
}
while (level >= 0) {
if (target > skipDoc[level]) {
if (!loadNextSkip(level)) {
continue;
}
} else {
// no more skips on this level, go down one level
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
seekChild(level - 1);
}
level--;
}
}
return numSkipped[0] - skipInterval[0] - 1;
}
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
// skip list entry
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
if (level != 0) {
// read the child pointer if we are not on the leaf level
childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1];
}
return true;
}
/** Seeks the skip entry on the given level */
protected void seekChild(int level) throws IOException {
skipStream[level].seek(lastChildPointer);
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
skipDoc[level] = lastDoc;
if (level > 0) {
childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1];
}
}
@Override
public void close() throws IOException {
for (int i = 1; i < skipStream.length; i++) {
if (skipStream[i] != null) {
skipStream[i].close();
}
}
}
/** Initializes the reader, for reuse on a new term. */
public void init(long skipPointer, int df) throws IOException {
this.skipPointer[0] = skipPointer;
this.docCount = df;
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0);
Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0);
for (int i = 1; i < numberOfSkipLevels; i++) {
skipStream[i] = null;
}
loadSkipLevels();
}
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
if (docCount <= skipInterval[0]) {
numberOfSkipLevels = 1;
} else {
numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier);
}
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
skipStream[0].seek(skipPointer[0]);
int toBuffer = numberOfLevelsToBuffer;
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = readLevelLength(skipStream[0]);
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
if (toBuffer > 0) {
// buffer this level
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
toBuffer--;
} else {
// clone this stream, it is already at the start of the current level
skipStream[i] = skipStream[0].clone();
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length));
}
// move base stream beyond the current level
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
}
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}
/**
* Subclasses must implement the actual skip data encoding in this method.
*
* @param level the level skip data shall be read from
* @param skipStream the skip stream to read from
*/
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
/**
* read the length of the current level written via {@link
* MultiLevelSkipListWriter#writeLevelLength(long, IndexOutput)}.
*
* @param skipStream the IndexInput the length shall be read from
* @return level length
*/
protected long readLevelLength(IndexInput skipStream) throws IOException {
return skipStream.readVLong();
}
/**
* read the child pointer written via {@link MultiLevelSkipListWriter#writeChildPointer(long,
* DataOutput)}.
*
* @param skipStream the IndexInput the child pointer shall be read from
* @return child pointer
*/
protected long readChildPointer(IndexInput skipStream) throws IOException {
return skipStream.readVLong();
}
/** Copies the values of the last read skip entry on this level */
protected void setLastSkipData(int level) {
lastDoc = skipDoc[level];
lastChildPointer = childPointer[level];
}
/** used to buffer the top skip levels */
private final static class SkipBuffer extends IndexInput {
private byte[] data;
private long pointer;
private int pos;
SkipBuffer(IndexInput input, int length) throws IOException {
super("SkipBuffer on " + input);
data = new byte[length];
pointer = input.getFilePointer();
input.readBytes(data, 0, length);
}
@Override
public void close() {
data = null;
}
@Override
public long getFilePointer() {
return pointer + pos;
}
@Override
public long length() {
return data.length;
}
@Override
public byte readByte() {
return data[pos++];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
System.arraycopy(data, pos, b, offset, len);
pos += len;
}
@Override
public void seek(long pos) {
this.pos = (int) (pos - pointer);
}
@Override
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
throw new UnsupportedOperationException();
}
}
}