blob: a9d26866503517c72dec0f7ab39b866ccda6be42 [file] [log] [blame]
Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java (working copy)
@@ -42,6 +42,7 @@
private final int minTermBlockSize;
private final int maxTermBlockSize;
public final static int DEFAULT_BLOCK_SIZE = 128;
+ public final static int DEFAULT_SKIP_INTERVAL = 32;
public BlockPostingsFormat() {
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
@@ -62,8 +63,7 @@
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- // TODO: implement a new PostingsWriterBase to improve skip-settings
- PostingsWriterBase postingsWriter = new BlockPostingsWriter(state, 128);
+ PostingsWriterBase postingsWriter = new BlockPostingsWriter(state);
boolean success = false;
try {
@@ -86,8 +86,7 @@
state.fieldInfos,
state.segmentInfo,
state.context,
- state.segmentSuffix,
- 128);
+ state.segmentSuffix);
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(state.dir,
Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java (working copy)
@@ -24,14 +24,37 @@
import org.apache.lucene.store.IndexInput;
/**
- * Implements the skip list reader for the 4.0 posting list format
+ * Implements the skip list reader for block postings format
* that stores positions and payloads.
*
- * @see Lucene40PostingsFormat
- * @lucene.experimental
+ * Although this skipper uses MultiLevelSkipListReader as an interface,
+ * its definition of skip position will be a little different.
+ *
+ * For example, when df==2*skipInterval==6,
+ *
+ * 0 1 2 3 4 5
+ * d d d d d d (posting list)
+ * ^ ^ (skip point in MultiLeveSkipWriter)
+ * ^ (skip point in BlockSkipWriter)
+ *
+ * In this case, MultiLevelSkipListReader will use the last document as a skip point,
+ * while BlockSkipReader should assume no skip point will comes.
+ *
+ * If we use the interface directly in BlockSkipReader, it may silly try to read
+ * another skip data after the only skip point is loaded.
+ *
+ * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId,
+ * and numSkipped+skipInterval == df, the MultiLevelSkipListReader will assume the skip list
+ * isn't exhausted yet, and try to load a non-existed skip point
+ *
+ * Therefore, we'll trim df before passing it to the interface. see trim(int)
+ *
*/
final class BlockSkipReader extends MultiLevelSkipListReader {
private boolean DEBUG = BlockPostingsReader.DEBUG;
+ private int skipInterval;
+ private int blockSize;
+ private int skipsPerBlock;
private long docPointer[];
private long posPointer[];
@@ -47,8 +70,10 @@
private long lastDocPointer;
private int lastPosBufferUpto;
- public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
+ public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
super(skipStream, maxSkipLevels, skipInterval);
+ this.skipInterval = skipInterval;
+ this.blockSize = blockSize;
docPointer = new long[maxSkipLevels];
if (hasPos) {
posPointer = new long[maxSkipLevels];
@@ -73,8 +98,23 @@
}
}
+
+ /**
+ * Trim original docFreq to tell skipReader read proper number of skip points.
+ *
+ * Since our definition in BlockSkip* is a little different from MultiLevelSkip*
+ * This trimed docFreq will prevent skipReader from:
+ * 1. silly reading a non-existed skip point after the last block boundary
+ * 2. moving into the vInt block
+ *
+ */
+ protected int trim(int df) {
+ int blockInts = df/blockSize*blockSize;
+ return df % blockSize == 0? df - 1: blockInts + skipInterval - 1;
+ }
+
public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) {
- super.init(skipPointer, df);
+ super.init(skipPointer, trim(df));
lastDocPointer = docBasePointer;
lastPosPointer = posBasePointer;
lastPayPointer = payBasePointer;
@@ -119,9 +159,6 @@
@Override
protected void seekChild(int level) throws IOException {
super.seekChild(level);
- if (DEBUG) {
- System.out.println("seekChild level=" + level);
- }
docPointer[level] = lastDocPointer;
if (posPointer != null) {
posPointer[level] = lastPosPointer;
@@ -142,16 +179,9 @@
protected void setLastSkipData(int level) {
super.setLastSkipData(level);
lastDocPointer = docPointer[level];
- if (DEBUG) {
- System.out.println("setLastSkipData level=" + level);
- System.out.println(" lastDocPointer=" + lastDocPointer);
- }
if (posPointer != null) {
lastPosPointer = posPointer[level];
lastPosBufferUpto = posBufferUpto[level];
- if (DEBUG) {
- System.out.println(" lastPosPointer=" + lastPosPointer + " lastPosBUfferUpto=" + lastPosBufferUpto);
- }
if (payPointer != null) {
lastPayPointer = payPointer[level];
}
@@ -166,27 +196,12 @@
@Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
- if (DEBUG) {
- System.out.println("readSkipData level=" + level);
- }
int delta = skipStream.readVInt();
- if (DEBUG) {
- System.out.println(" delta=" + delta);
- }
docPointer[level] += skipStream.readVInt();
- if (DEBUG) {
- System.out.println(" docFP=" + docPointer[level]);
- }
if (posPointer != null) {
posPointer[level] += skipStream.readVInt();
- if (DEBUG) {
- System.out.println(" posFP=" + posPointer[level]);
- }
posBufferUpto[level] = skipStream.readVInt();
- if (DEBUG) {
- System.out.println(" posBufferUpto=" + posBufferUpto[level]);
- }
if (payloadByteUpto != null) {
payloadByteUpto[level] = skipStream.readVInt();
Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (working copy)
@@ -55,10 +55,12 @@
// nocommit
final String segment;
- // NOTE: not private to avoid access$NNN methods:
- final int blockSize;
+ // nocommit: is it ok to set these two fixed?
+ final static int blockSize = BlockPostingsFormat.DEFAULT_BLOCK_SIZE;
+ final static int skipInterval = BlockPostingsFormat.DEFAULT_SKIP_INTERVAL;
+ final static int skipsPerBlock = blockSize/skipInterval;
- public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix, int blockSize) throws IOException {
+ public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
boolean success = false;
segment = segmentInfo.name;
IndexInput docIn = null;
@@ -100,7 +102,7 @@
}
}
- this.blockSize = blockSize;
+ assert blockSize % skipInterval == 0;
}
@Override
@@ -116,6 +118,24 @@
}
}
+ static void readVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
+ if (indexHasFreq) {
+ for(int i=0;i<num;i++) {
+ final int code = docIn.readVInt();
+ docBuffer[i] = code >>> 1;
+ if ((code & 1) != 0) {
+ freqBuffer[i] = 1;
+ } else {
+ freqBuffer[i] = docIn.readVInt();
+ }
+ }
+ } else {
+ for(int i=0;i<num;i++) {
+ docBuffer[i] = docIn.readVInt();
+ }
+ }
+ }
+
static void readBlock(IndexInput in, byte[] encoded, IntBuffer encodedBuffer, int[] buffer) throws IOException {
int header = in.readVInt();
in.readBytes(encoded, 0, ForUtil.getEncodedSize(header));
@@ -300,7 +320,8 @@
}
}
- final class BlockDocsEnum extends DocsEnum {
+
+ final public class BlockDocsEnum extends DocsEnum {
private final byte[] encoded;
private final IntBuffer encodedBuffer;
@@ -309,6 +330,11 @@
private int docBufferUpto;
+ // How may ints encoded as ForBlock
+ private int blockInts;
+
+ private boolean loadNextBlock;
+
private BlockSkipReader skipper;
private boolean skipped;
@@ -343,7 +369,7 @@
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
- encoded = new byte[blockSize*4 + 4];
+ encoded = new byte[blockSize*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
}
@@ -356,9 +382,6 @@
public DocsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
this.liveDocs = liveDocs;
- if (DEBUG) {
- System.out.println(" FPR.reset: seg=" + segment + " termState=" + termState);
- }
docFreq = termState.docFreq;
docTermStartFP = termState.docStartFP;
docIn.seek(docTermStartFP);
@@ -371,6 +394,8 @@
accum = 0;
docUpto = 0;
docBufferUpto = blockSize;
+ blockInts = docFreq/blockSize*blockSize;
+ loadNextBlock = false;
skipped = false;
return this;
}
@@ -385,70 +410,45 @@
return doc;
}
+
+ /**
+ * Decode a piece of ints int docDelta(and docTermFreq) buffer.
+ *
+ * When fully is set false, we'll simplly skip to the end of current block
+ * after partial decoding is done.
+ */
private void refillDocs() throws IOException {
final int left = docFreq - docUpto;
assert left > 0;
- if (left >= blockSize) {
- if (DEBUG) {
- System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
- }
+ // We are still reading inside a block, otherwise we
+ // should consider to handle the tail after these blocks
+ if (docUpto < blockInts) {
+ //System.out.println("["+docFreq+"]"+" refillDoc");
readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer);
-
if (indexHasFreq) {
- if (DEBUG) {
- System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
- }
readBlock(docIn, encoded, encodedBuffer, freqBuffer);
}
} else {
- // Read vInts:
- if (DEBUG) {
- System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer());
- }
- for(int i=0;i<left;i++) {
- final int code = docIn.readVInt();
- if (indexHasFreq) {
- docDeltaBuffer[i] = code >>> 1;
- if ((code & 1) != 0) {
- freqBuffer[i] = 1;
- } else {
- freqBuffer[i] = docIn.readVInt();
- }
- } else {
- docDeltaBuffer[i] = code;
- }
- }
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
}
- docBufferUpto = 0;
+ if (loadNextBlock) {
+ loadNextBlock = false;
+ } else {
+ docBufferUpto = 0;
+ }
}
@Override
public int nextDoc() throws IOException {
-
- if (DEBUG) {
- System.out.println("\nFPR.nextDoc");
- }
-
while (true) {
- if (DEBUG) {
- System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
- }
-
if (docUpto == docFreq) {
- if (DEBUG) {
- System.out.println(" return doc=END");
- }
return doc = NO_MORE_DOCS;
}
-
- if (docBufferUpto == blockSize) {
+ //System.out.println("["+docFreq+"]"+" nextDoc");
+ if (loadNextBlock || docBufferUpto == blockSize) {
refillDocs();
}
-
- if (DEBUG) {
- System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]);
- }
accum += docDeltaBuffer[docBufferUpto];
docUpto++;
@@ -456,20 +456,12 @@
doc = accum;
freq = freqBuffer[docBufferUpto];
docBufferUpto++;
- if (DEBUG) {
- System.out.println(" return doc=" + doc + " freq=" + freq);
- }
return doc;
}
-
- if (DEBUG) {
- System.out.println(" doc=" + accum + " is deleted; try next doc");
- }
-
docBufferUpto++;
}
}
-
+
@Override
public int advance(int target) throws IOException {
// nocommit make frq block load lazy/skippable
@@ -478,16 +470,18 @@
// nocommit put cheating back! does it help?
// nocommit use skipper!!! it has next last doc id!!
//if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) {
- if (docFreq > blockSize && target - accum > blockSize) {
- if (DEBUG) {
- System.out.println("load skipper");
- }
+ // We allow the skipper to run iff:
+ // 1. We have enough ints to use partial decode
+ // 2. The target is not within one skip
+ // 3. We won't skip into vInt block
+ if (docFreq > blockSize && target - accum > skipInterval && accum + skipInterval <= blockInts) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper = new BlockSkipReader((IndexInput) docIn.clone(),
BlockPostingsWriter.maxSkipLevels,
+ skipInterval,
blockSize,
indexHasPos,
indexHasOffsets,
@@ -498,6 +492,14 @@
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
+ //
+ // We don't always use the real docFreq, to prevent the skipper
+ // from
+ // 1. silly reading a non-existed skip point after the last block boundary
+ // 2. moving into the vInt block
+ //
+ // See BlockSkipReader for detailed explanation
+ //
skipper.init(docTermStartFP+skipOffset, docTermStartFP, 0, 0, docFreq);
skipped = true;
}
@@ -506,35 +508,23 @@
if (newDocUpto > docUpto) {
// Skipper moved
-
- if (DEBUG) {
- System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
- }
-
- assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto;
+ assert newDocUpto % skipInterval == (skipInterval-1): "got " + newDocUpto;
docUpto = newDocUpto+1;
- // Force block read next:
- docBufferUpto = blockSize;
- accum = skipper.getDoc();
- docIn.seek(skipper.getDocPointer());
+ // Force to read next slice of block
+ docBufferUpto = docUpto % blockSize;
+ loadNextBlock = true;
+ accum = skipper.getDoc(); // actually, this is just lastSkipEntry
+ docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
}
}
// Now scan:
while (nextDoc() != NO_MORE_DOCS) {
if (doc >= target) {
- if (DEBUG) {
- System.out.println(" advance return doc=" + doc);
- }
return doc;
}
}
-
- if (DEBUG) {
- System.out.println(" advance return doc=END");
- }
-
return NO_MORE_DOCS;
}
}
@@ -552,6 +542,9 @@
private int docBufferUpto;
private int posBufferUpto;
+ private int blockInts;
+ private boolean loadNextBlock;
+
private BlockSkipReader skipper;
private boolean skipped;
@@ -604,7 +597,7 @@
this.startDocIn = BlockPostingsReader.this.docIn;
this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone();
- encoded = new byte[blockSize*4 + 4];
+ encoded = new byte[blockSize*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
@@ -618,9 +611,6 @@
public DocsAndPositionsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
this.liveDocs = liveDocs;
- if (DEBUG) {
- System.out.println(" FPR.reset: termState=" + termState);
- }
docFreq = termState.docFreq;
docTermStartFP = termState.docStartFP;
posTermStartFP = termState.posStartFP;
@@ -641,6 +631,8 @@
accum = 0;
docUpto = 0;
docBufferUpto = blockSize;
+ blockInts = docFreq/blockSize*blockSize;
+ loadNextBlock = false;
skipped = false;
return this;
}
@@ -658,45 +650,22 @@
private void refillDocs() throws IOException {
final int left = docFreq - docUpto;
assert left > 0;
-
- if (left >= blockSize) {
- if (DEBUG) {
- System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
- }
-
+ if (docUpto < blockInts) {
+ //System.out.println("["+docFreq+"]"+" refillDoc");
readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer);
-
- if (DEBUG) {
- System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
- }
-
readBlock(docIn, encoded, encodedBuffer, freqBuffer);
} else {
- // Read vInts:
- if (DEBUG) {
- System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
- }
- for(int i=0;i<left;i++) {
- final int code = docIn.readVInt();
- docDeltaBuffer[i] = code >>> 1;
- if ((code & 1) != 0) {
- freqBuffer[i] = 1;
- } else {
- freqBuffer[i] = docIn.readVInt();
- }
- }
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
}
- docBufferUpto = 0;
+ if (loadNextBlock) {
+ loadNextBlock = false;
+ } else {
+ docBufferUpto = 0;
+ }
}
private void refillPositions() throws IOException {
- if (DEBUG) {
- System.out.println(" refillPositions");
- }
if (posIn.getFilePointer() == lastPosBlockFP) {
- if (DEBUG) {
- System.out.println(" vInt pos block @ fp=" + posIn.getFilePointer() + " hasPayloads=" + indexHasPayloads + " hasOffsets=" + indexHasOffsets);
- }
final int count = posIn.readVInt();
int payloadLength = 0;
for(int i=0;i<count;i++) {
@@ -712,43 +681,26 @@
} else {
posDeltaBuffer[i] = code;
}
-
if (indexHasOffsets) {
posIn.readVInt();
posIn.readVInt();
}
}
} else {
- if (DEBUG) {
- System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer());
- }
readBlock(posIn, encoded, encodedBuffer, posDeltaBuffer);
}
}
@Override
public int nextDoc() throws IOException {
-
- if (DEBUG) {
- System.out.println(" FPR.nextDoc");
- }
-
while (true) {
- if (DEBUG) {
- System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
- }
-
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
-
- if (docBufferUpto == blockSize) {
+ //System.out.println("["+docFreq+"]"+" nextDoc");
+ if (loadNextBlock || docBufferUpto == blockSize) {
refillDocs();
}
-
- if (DEBUG) {
- System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]);
- }
accum += docDeltaBuffer[docBufferUpto];
freq = freqBuffer[docBufferUpto];
posPendingCount += freq;
@@ -757,43 +709,27 @@
if (liveDocs == null || liveDocs.get(accum)) {
doc = accum;
- if (DEBUG) {
- System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
- }
position = 0;
return doc;
}
-
- if (DEBUG) {
- System.out.println(" doc=" + accum + " is deleted; try next doc");
- }
}
}
@Override
public int advance(int target) throws IOException {
// nocommit make frq block load lazy/skippable
- if (DEBUG) {
- System.out.println(" FPR.advance target=" + target);
- }
// nocommit 2 is heuristic guess!!
// nocommit put cheating back! does it help?
// nocommit use skipper!!! it has next last doc id!!
//if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) {
- if (docFreq > blockSize && target - accum > blockSize) {
+ if (docFreq > blockSize && target - accum > skipInterval && accum + skipInterval <= blockInts) {
- if (DEBUG) {
- System.out.println(" try skipper");
- }
-
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
- if (DEBUG) {
- System.out.println(" create skipper");
- }
skipper = new BlockSkipReader((IndexInput) docIn.clone(),
BlockPostingsWriter.maxSkipLevels,
+ skipInterval,
blockSize,
true,
indexHasOffsets,
@@ -804,9 +740,6 @@
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
- if (DEBUG) {
- System.out.println(" init skipper");
- }
skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
skipped = true;
}
@@ -815,16 +748,11 @@
if (newDocUpto > docUpto) {
// Skipper moved
-
- if (DEBUG) {
- System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
- }
-
- assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto;
+ assert newDocUpto % skipInterval == (skipInterval-1): "got " + newDocUpto;
docUpto = newDocUpto+1;
- // Force block read next:
- docBufferUpto = blockSize;
+ docBufferUpto = docUpto % blockSize;
+ loadNextBlock = true;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer();
@@ -835,16 +763,10 @@
// Now scan:
while (nextDoc() != NO_MORE_DOCS) {
if (doc >= target) {
- if (DEBUG) {
- System.out.println(" advance return doc=" + doc);
- }
return doc;
}
}
- if (DEBUG) {
- System.out.println(" advance return doc=END");
- }
return NO_MORE_DOCS;
}
@@ -856,31 +778,19 @@
private void skipPositions() throws IOException {
// Skip positions now:
int toSkip = posPendingCount - freq;
- if (DEBUG) {
- System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
- }
final int leftInBlock = blockSize - posBufferUpto;
if (toSkip < leftInBlock) {
posBufferUpto += toSkip;
- if (DEBUG) {
- System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto);
- }
} else {
toSkip -= leftInBlock;
while(toSkip >= blockSize) {
- if (DEBUG) {
- System.out.println(" skip whole block @ fp=" + posIn.getFilePointer());
- }
assert posIn.getFilePointer() != lastPosBlockFP;
skipBlock(posIn);
toSkip -= blockSize;
}
refillPositions();
posBufferUpto = toSkip;
- if (DEBUG) {
- System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto);
- }
}
position = 0;
@@ -888,13 +798,7 @@
@Override
public int nextPosition() throws IOException {
- if (DEBUG) {
- System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto);
- }
if (posPendingFP != -1) {
- if (DEBUG) {
- System.out.println(" seek to pendingFP=" + posPendingFP);
- }
posIn.seek(posPendingFP);
posPendingFP = -1;
@@ -913,9 +817,6 @@
}
position += posDeltaBuffer[posBufferUpto++];
posPendingCount--;
- if (DEBUG) {
- System.out.println(" return pos=" + position);
- }
return position;
}
@@ -965,6 +866,9 @@
private int docBufferUpto;
private int posBufferUpto;
+ private int blockInts;
+ private boolean loadNextBlock;
+
private BlockSkipReader skipper;
private boolean skipped;
@@ -1024,7 +928,7 @@
this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone();
this.payIn = (IndexInput) BlockPostingsReader.this.payIn.clone();
- encoded = new byte[blockSize*4 + 4];
+ encoded = new byte[blockSize*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (indexHasOffsets) {
@@ -1057,9 +961,6 @@
public EverythingEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
this.liveDocs = liveDocs;
- if (DEBUG) {
- System.out.println(" FPR.reset: termState=" + termState);
- }
docFreq = termState.docFreq;
docTermStartFP = termState.docStartFP;
posTermStartFP = termState.posStartFP;
@@ -1081,6 +982,8 @@
accum = 0;
docUpto = 0;
docBufferUpto = blockSize;
+ blockInts = docFreq/blockSize*blockSize;
+ loadNextBlock = false;
skipped = false;
return this;
}
@@ -1099,44 +1002,22 @@
final int left = docFreq - docUpto;
assert left > 0;
- if (left >= blockSize) {
- if (DEBUG) {
- System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
- }
-
+ if (docUpto < blockInts) {
+ //System.out.println("["+docFreq+"]"+" refillDoc");
readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer);
-
- if (DEBUG) {
- System.out.println(" fill freq block from fp=" + docIn.getFilePointer());
- }
-
readBlock(docIn, encoded, encodedBuffer, freqBuffer);
} else {
- // Read vInts:
- if (DEBUG) {
- System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer());
- }
- for(int i=0;i<left;i++) {
- final int code = docIn.readVInt();
- docDeltaBuffer[i] = code >>> 1;
- if ((code & 1) != 0) {
- freqBuffer[i] = 1;
- } else {
- freqBuffer[i] = docIn.readVInt();
- }
- }
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
}
- docBufferUpto = 0;
+ if (loadNextBlock) {
+ loadNextBlock = false;
+ } else {
+ docBufferUpto = 0;
+ }
}
private void refillPositions() throws IOException {
- if (DEBUG) {
- System.out.println(" refillPositions");
- }
if (posIn.getFilePointer() == lastPosBlockFP) {
- if (DEBUG) {
- System.out.println(" vInt pos block @ fp=" + posIn.getFilePointer() + " hasPayloads=" + indexHasPayloads + " hasOffsets=" + indexHasOffsets);
- }
final int count = posIn.readVInt();
int payloadLength = 0;
payloadByteUpto = 0;
@@ -1146,9 +1027,6 @@
if ((code & 1) != 0) {
payloadLength = posIn.readVInt();
}
- if (DEBUG) {
- System.out.println(" i=" + i + " payloadLen=" + payloadLength);
- }
payloadLengthBuffer[i] = payloadLength;
posDeltaBuffer[i] = code >>> 1;
if (payloadLength != 0) {
@@ -1164,32 +1042,17 @@
}
if (indexHasOffsets) {
- if (DEBUG) {
- System.out.println(" i=" + i + " read offsets from posIn.fp=" + posIn.getFilePointer());
- }
offsetStartDeltaBuffer[i] = posIn.readVInt();
offsetLengthBuffer[i] = posIn.readVInt();
- if (DEBUG) {
- System.out.println(" startOffDelta=" + offsetStartDeltaBuffer[i] + " offsetLen=" + offsetLengthBuffer[i]);
- }
}
}
payloadByteUpto = 0;
} else {
- if (DEBUG) {
- System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer());
- }
readBlock(posIn, encoded, encodedBuffer, posDeltaBuffer);
if (indexHasPayloads) {
- if (DEBUG) {
- System.out.println(" bulk payload block @ pay.fp=" + payIn.getFilePointer());
- }
readBlock(payIn, encoded, encodedBuffer, payloadLengthBuffer);
int numBytes = payIn.readVInt();
- if (DEBUG) {
- System.out.println(" " + numBytes + " payload bytes @ pay.fp=" + payIn.getFilePointer());
- }
if (numBytes > payloadBytes.length) {
payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
}
@@ -1198,9 +1061,6 @@
}
if (indexHasOffsets) {
- if (DEBUG) {
- System.out.println(" bulk offset block @ pay.fp=" + payIn.getFilePointer());
- }
readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaBuffer);
readBlock(payIn, encoded, encodedBuffer, offsetLengthBuffer);
}
@@ -1209,32 +1069,18 @@
@Override
public int nextDoc() throws IOException {
-
- if (DEBUG) {
- System.out.println(" FPR.nextDoc");
- }
-
if (indexHasPayloads) {
payloadByteUpto += payloadLength;
payloadLength = 0;
}
-
while (true) {
- if (DEBUG) {
- System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
- }
-
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
-
- if (docBufferUpto == blockSize) {
+ //System.out.println("["+docFreq+"]"+" nextDoc");
+ if (loadNextBlock || docBufferUpto == blockSize) {
refillDocs();
}
-
- if (DEBUG) {
- System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]);
- }
accum += docDeltaBuffer[docBufferUpto];
freq = freqBuffer[docBufferUpto];
posPendingCount += freq;
@@ -1243,45 +1089,30 @@
if (liveDocs == null || liveDocs.get(accum)) {
doc = accum;
- if (DEBUG) {
- System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
- }
position = 0;
payloadLength = 0;
lastStartOffset = 0;
return doc;
}
-
- if (DEBUG) {
- System.out.println(" doc=" + accum + " is deleted; try next doc");
- }
}
}
@Override
public int advance(int target) throws IOException {
// nocommit make frq block load lazy/skippable
- if (DEBUG) {
- System.out.println(" FPR.advance target=" + target);
- }
// nocommit 2 is heuristic guess!!
// nocommit put cheating back! does it help?
// nocommit use skipper!!! it has next last doc id!!
//if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) {
- if (docFreq > blockSize && target - accum > blockSize) {
+ if (docFreq > blockSize && target - accum > skipInterval && accum + skipInterval <= blockInts) {
- if (DEBUG) {
- System.out.println(" try skipper");
- }
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
- if (DEBUG) {
- System.out.println(" create skipper");
- }
skipper = new BlockSkipReader((IndexInput) docIn.clone(),
BlockPostingsWriter.maxSkipLevels,
+ skipInterval,
blockSize,
true,
indexHasOffsets,
@@ -1292,9 +1123,6 @@
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
- if (DEBUG) {
- System.out.println(" init skipper");
- }
skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
skipped = true;
}
@@ -1303,16 +1131,11 @@
if (newDocUpto > docUpto) {
// Skipper moved
-
- if (DEBUG) {
- System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
- }
-
- assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto;
+ assert newDocUpto % skipInterval == (skipInterval-1): "got " + newDocUpto;
docUpto = newDocUpto+1;
- // Force block read next:
- docBufferUpto = blockSize;
+ docBufferUpto = docUpto % blockSize;
+ loadNextBlock = true;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer();
@@ -1326,16 +1149,10 @@
// Now scan:
while (nextDoc() != NO_MORE_DOCS) {
if (doc >= target) {
- if (DEBUG) {
- System.out.println(" advance return doc=" + doc);
- }
return doc;
}
}
- if (DEBUG) {
- System.out.println(" advance return doc=END");
- }
return NO_MORE_DOCS;
}
@@ -1347,9 +1164,6 @@
private void skipPositions() throws IOException {
// Skip positions now:
int toSkip = posPendingCount - freq;
- if (DEBUG) {
- System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
- }
final int leftInBlock = blockSize - posBufferUpto;
if (toSkip < leftInBlock) {
@@ -1363,15 +1177,9 @@
}
posBufferUpto++;
}
- if (DEBUG) {
- System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto);
- }
} else {
toSkip -= leftInBlock;
while(toSkip >= blockSize) {
- if (DEBUG) {
- System.out.println(" skip whole block @ fp=" + posIn.getFilePointer());
- }
assert posIn.getFilePointer() != lastPosBlockFP;
skipBlock(posIn);
@@ -1407,9 +1215,6 @@
}
posBufferUpto++;
}
- if (DEBUG) {
- System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto);
- }
}
position = 0;
@@ -1421,20 +1226,11 @@
@Override
public int nextPosition() throws IOException {
- if (DEBUG) {
- System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto + " payloadByteUpto=" + payloadByteUpto);
- }
if (posPendingFP != -1) {
- if (DEBUG) {
- System.out.println(" seek pos to pendingFP=" + posPendingFP);
- }
posIn.seek(posPendingFP);
posPendingFP = -1;
if (payPendingFP != -1) {
- if (DEBUG) {
- System.out.println(" seek pay to pendingFP=" + payPendingFP);
- }
payIn.seek(payPendingFP);
payPendingFP = -1;
}
@@ -1444,11 +1240,6 @@
}
if (indexHasPayloads) {
- if (DEBUG) {
- if (payloadLength != 0) {
- System.out.println(" skip unread payload length=" + payloadLength);
- }
- }
payloadByteUpto += payloadLength;
payloadLength = 0;
}
@@ -1476,9 +1267,6 @@
posBufferUpto++;
posPendingCount--;
- if (DEBUG) {
- System.out.println(" return pos=" + position);
- }
return position;
}
@@ -1499,9 +1287,6 @@
@Override
public BytesRef getPayload() {
- if (DEBUG) {
- System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto);
- }
payload.bytes = payloadBytes;
payload.offset = payloadByteUpto;
payload.length = payloadLength;
Index: lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py (working copy)
@@ -75,18 +75,18 @@
w("final class PackedIntsDecompress {\n")
w('\n // nocommit: assess perf of this to see if specializing is really needed\n')
- w('\n // NOTE: hardwired to blockSize == 128\n\n')
+ w('\n // NOTE: hardwired to blockSize == 32*k \n\n')
- w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n')
- w(' Arrays.fill(output, compressedBuffer.get());\n')
+ w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output, int iteration) {\n')
+ w(' Arrays.fill(output, 0, iteration*32, compressedBuffer.get());\n')
w(' }\n')
for numFrameBits in xrange(1, 33):
- w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
+ w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output, int iteration) {\n' % numFrameBits)
w(' final int numFrameBits = %d;\n' % numFrameBits)
w(' final int mask = (int) ((1L<<numFrameBits) - 1);\n')
w(' int outputOffset = 0;\n')
- w(' for(int step=0;step<4;step++) {\n')
+ w(' for(int step=0;step<iteration;step++) {\n')
for i in range(numFrameBits): # declare int vars and init from buffer
w(" int intValue" + str(i) + " = compressedBuffer.get();\n")
Index: lucene/core/src/java/org/apache/lucene/codecs/block/PackedIntsDecompress.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/PackedIntsDecompress.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/PackedIntsDecompress.java (working copy)
@@ -25,16 +25,16 @@
// nocommit: assess perf of this to see if specializing is really needed
- // NOTE: hardwired to blockSize == 128
+ // NOTE: hardwired to blockSize == 32*k
- public static void decode0(final IntBuffer compressedBuffer, final int[] output) {
- Arrays.fill(output, compressedBuffer.get());
+ public static void decode0(final IntBuffer compressedBuffer, final int[] output, int iteration) {
+ Arrays.fill(output, 0, iteration*32, compressedBuffer.get());
}
- public static void decode1(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode1(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 1;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
output[0 + outputOffset] = intValue0 & mask;
output[1 + outputOffset] = (intValue0 >>> 1) & mask;
@@ -71,11 +71,11 @@
outputOffset += 32;
}
}
- public static void decode2(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode2(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 2;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
output[0 + outputOffset] = intValue0 & mask;
@@ -113,11 +113,11 @@
outputOffset += 32;
}
}
- public static void decode3(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode3(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 3;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -156,11 +156,11 @@
outputOffset += 32;
}
}
- public static void decode4(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode4(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 4;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -200,11 +200,11 @@
outputOffset += 32;
}
}
- public static void decode5(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode5(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 5;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -245,11 +245,11 @@
outputOffset += 32;
}
}
- public static void decode6(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode6(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 6;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -291,11 +291,11 @@
outputOffset += 32;
}
}
- public static void decode7(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode7(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 7;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -338,11 +338,11 @@
outputOffset += 32;
}
}
- public static void decode8(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode8(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 8;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -386,11 +386,11 @@
outputOffset += 32;
}
}
- public static void decode9(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode9(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 9;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -435,11 +435,11 @@
outputOffset += 32;
}
}
- public static void decode10(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode10(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 10;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -485,11 +485,11 @@
outputOffset += 32;
}
}
- public static void decode11(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode11(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 11;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -536,11 +536,11 @@
outputOffset += 32;
}
}
- public static void decode12(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode12(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 12;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -588,11 +588,11 @@
outputOffset += 32;
}
}
- public static void decode13(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode13(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 13;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -641,11 +641,11 @@
outputOffset += 32;
}
}
- public static void decode14(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode14(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 14;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -695,11 +695,11 @@
outputOffset += 32;
}
}
- public static void decode15(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode15(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 15;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -750,11 +750,11 @@
outputOffset += 32;
}
}
- public static void decode16(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode16(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 16;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -806,11 +806,11 @@
outputOffset += 32;
}
}
- public static void decode17(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode17(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 17;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -863,11 +863,11 @@
outputOffset += 32;
}
}
- public static void decode18(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode18(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 18;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -921,11 +921,11 @@
outputOffset += 32;
}
}
- public static void decode19(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode19(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 19;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -980,11 +980,11 @@
outputOffset += 32;
}
}
- public static void decode20(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode20(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 20;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1040,11 +1040,11 @@
outputOffset += 32;
}
}
- public static void decode21(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode21(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 21;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1101,11 +1101,11 @@
outputOffset += 32;
}
}
- public static void decode22(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode22(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 22;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1163,11 +1163,11 @@
outputOffset += 32;
}
}
- public static void decode23(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode23(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 23;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1226,11 +1226,11 @@
outputOffset += 32;
}
}
- public static void decode24(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode24(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 24;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1290,11 +1290,11 @@
outputOffset += 32;
}
}
- public static void decode25(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode25(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 25;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1355,11 +1355,11 @@
outputOffset += 32;
}
}
- public static void decode26(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode26(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 26;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1421,11 +1421,11 @@
outputOffset += 32;
}
}
- public static void decode27(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode27(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 27;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1488,11 +1488,11 @@
outputOffset += 32;
}
}
- public static void decode28(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode28(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 28;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1556,11 +1556,11 @@
outputOffset += 32;
}
}
- public static void decode29(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode29(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 29;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1625,11 +1625,11 @@
outputOffset += 32;
}
}
- public static void decode30(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode30(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 30;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1695,11 +1695,11 @@
outputOffset += 32;
}
}
- public static void decode31(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode31(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 31;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
@@ -1766,11 +1766,11 @@
outputOffset += 32;
}
}
- public static void decode32(final IntBuffer compressedBuffer, final int[] output) {
+ public static void decode32(final IntBuffer compressedBuffer, final int[] output, int iteration) {
final int numFrameBits = 32;
final int mask = (int) ((1L<<numFrameBits) - 1);
int outputOffset = 0;
- for(int step=0;step<4;step++) {
+ for(int step=0;step<iteration;step++) {
int intValue0 = compressedBuffer.get();
int intValue1 = compressedBuffer.get();
int intValue2 = compressedBuffer.get();
Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (working copy)
@@ -60,9 +60,11 @@
final IndexOutput posOut;
final IndexOutput payOut;
- static final int DEFAULT_BLOCK_SIZE = 128;
+ final static int blockSize = BlockPostingsFormat.DEFAULT_BLOCK_SIZE;
+ final static int skipInterval = BlockPostingsFormat.DEFAULT_SKIP_INTERVAL;
- final int blockSize;
+ // how many slices(or intervals) one block holds
+ final static int skipsPerBlock = blockSize/skipInterval;
private IndexOutput termsOut;
@@ -90,13 +92,13 @@
private byte[] payloadBytes;
private int payloadByteUpto;
- private int lastBlockDocID;
- private boolean saveNextPosBlock;
- private long lastBlockPosFP;
- private long lastBlockPayFP;
- private int lastBlockPosBufferUpto;
- private int lastBlockStartOffset;
- private int lastBlockPayloadByteUpto;
+ final int[] lastBlockDocIDs;
+ private long[] lastBlockPosFPs;
+ private long[] lastBlockPayFPs;
+ private int[] lastBlockPosBufferUptos;
+ private int[] lastBlockStartOffsets;
+ private int[] lastBlockPayloadByteUptos;
+
private int lastDocID;
private int lastPosition;
private int lastStartOffset;
@@ -107,9 +109,8 @@
private final BlockSkipWriter skipWriter;
- public BlockPostingsWriter(SegmentWriteState state, int blockSize) throws IOException {
+ public BlockPostingsWriter(SegmentWriteState state) throws IOException {
super();
- this.blockSize = blockSize;
docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.DOC_EXTENSION),
state.context);
@@ -164,14 +165,22 @@
docDeltaBuffer = new int[blockSize];
freqBuffer = new int[blockSize];
- skipWriter = new BlockSkipWriter(blockSize,
- maxSkipLevels,
+ lastBlockDocIDs = new int[skipsPerBlock];
+ lastBlockPosFPs = new long[skipsPerBlock];
+ lastBlockPayFPs = new long[skipsPerBlock];
+ lastBlockPosBufferUptos = new int[skipsPerBlock];
+ lastBlockStartOffsets = new int[skipsPerBlock];
+ lastBlockPayloadByteUptos = new int[skipsPerBlock];
+
+ skipWriter = new BlockSkipWriter(maxSkipLevels,
+ skipInterval,
+ blockSize,
state.segmentInfo.getDocCount(),
docOut,
posOut,
payOut);
- encoded = new byte[blockSize*4 + 4];
+ encoded = new byte[blockSize*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
}
@@ -201,7 +210,7 @@
payTermStartFP = payOut.getFilePointer();
}
}
- lastBlockDocID = -1;
+ lastBlockDocIDs[0] = -1;
lastDocID = 0;
if (DEBUG) {
System.out.println("FPW.startTerm startFP=" + docTermStartFP);
@@ -211,7 +220,6 @@
private void writeBlock(int[] buffer, IndexOutput out) throws IOException {
final int header = ForUtil.compress(buffer, encodedBuffer);
- //System.out.println(" block has " + numBytes + " bytes");
out.writeVInt(header);
out.writeBytes(encoded, ForUtil.getEncodedSize(header));
}
@@ -219,74 +227,24 @@
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
if (DEBUG) {
- System.out.println("FPW.startDoc docID=" + docID);
+ System.out.println("FPW.startDoc docID["+docBufferUpto+"]=" + docID);
}
+ final int docDelta = docID - lastDocID;
- // nocommit do this in finishDoc... but does it fail...?
- // is it not always called...?
- if (posOut != null && saveNextPosBlock) {
- lastBlockPosFP = posOut.getFilePointer();
- if (payOut != null) {
- lastBlockPayFP = payOut.getFilePointer();
- }
- lastBlockPosBufferUpto = posBufferUpto;
- lastBlockStartOffset = lastStartOffset;
- lastBlockPayloadByteUpto = payloadByteUpto;
- saveNextPosBlock = false;
- if (DEBUG) {
- System.out.println(" now save lastBlockPosFP=" + lastBlockPosFP + " lastBlockPosBufferUpto=" + lastBlockPosBufferUpto + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUpto);
- }
- }
-
- final int docDelta = docID - lastDocID;
if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
}
- lastDocID = docID;
docDeltaBuffer[docBufferUpto] = docDelta;
- if (DEBUG) {
- System.out.println(" docDeltaBuffer[" + docBufferUpto + "]=" + docDelta);
- }
+// if (DEBUG) {
+// System.out.println(" docDeltaBuffer[" + docBufferUpto + "]=" + docDelta);
+// }
if (fieldHasFreqs) {
freqBuffer[docBufferUpto] = termDocFreq;
}
-
docBufferUpto++;
docCount++;
-
- if (docBufferUpto == blockSize) {
- // nocommit maybe instead of buffering skip before
- // writing a block based on last block's end data
- // ... we could buffer after writing the block? only
- // iffiness with that approach is it could be a
- // pointlness skip? like we may stop adding docs
- // right after that, then we have skip point AFTER
- // last doc. the thing is, in finishTerm we are
- // already sometimes adding a skip point AFTER the
- // last doc?
- if (lastBlockDocID != -1) {
- if (DEBUG) {
- System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-blockSize));
- }
- skipWriter.bufferSkip(lastBlockDocID, docCount-blockSize, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
- }
- lastBlockDocID = docID;
- saveNextPosBlock = true;
-
- if (DEBUG) {
- System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer());
- }
- writeBlock(docDeltaBuffer, docOut);
- if (fieldHasFreqs) {
- if (DEBUG) {
- System.out.println(" write freq block @ fp=" + docOut.getFilePointer());
- }
- writeBlock(freqBuffer, docOut);
- }
- docBufferUpto = 0;
- }
-
+ lastDocID = docID;
lastPosition = 0;
lastStartOffset = 0;
}
@@ -343,7 +301,46 @@
}
@Override
- public void finishDoc() {
+ public void finishDoc() throws IOException {
+ // Have collected a block of docs, should write skip data as well as
+ // postings list for previous block
+ if (docBufferUpto == blockSize) {
+ skipWriter.bufferSkip(lastBlockDocIDs, docCount-blockSize, lastBlockPosFPs, lastBlockPayFPs, lastBlockPosBufferUptos, lastBlockStartOffsets, lastBlockPayloadByteUptos);
+ if (DEBUG) {
+ System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocIDs[0] + " docCount=" + (docCount-blockSize));
+ }
+ if (DEBUG) {
+ System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer());
+ }
+ writeBlock(docDeltaBuffer, docOut);
+ if (fieldHasFreqs) {
+ if (DEBUG) {
+ System.out.println(" write freq block @ fp=" + docOut.getFilePointer());
+ }
+ writeBlock(freqBuffer, docOut);
+ }
+ docBufferUpto = 0;
+ }
+
+ // Since we don't know df for current term, we had to buffer
+ // those skip data for each block, and when a block of docs are
+ // collected, write them to skip file.
+ int slice = (docBufferUpto)/skipInterval;
+ if (docBufferUpto % skipInterval == 0) {
+ lastBlockDocIDs[slice] = lastDocID;
+ if (posOut != null) {
+ if (payOut != null) {
+ lastBlockPayFPs[slice] = payOut.getFilePointer();
+ }
+ lastBlockPosFPs[slice] = posOut.getFilePointer();
+ lastBlockPosBufferUptos[slice] = posBufferUpto;
+ lastBlockStartOffsets[slice] = lastStartOffset;
+ lastBlockPayloadByteUptos[slice] = payloadByteUpto;
+ }
+ if (DEBUG) {
+ System.out.println(" docBufferUpto="+docBufferUpto+" now get lastBlockDocID="+lastBlockDocIDs[slice]+" lastBlockPosFP=" + lastBlockPosFPs[slice] + " lastBlockPosBufferUpto=" + lastBlockPosBufferUptos[slice] + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUptos[slice]);
+ }
+ }
}
private static class PendingTerm {
@@ -367,7 +364,6 @@
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
-
assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
@@ -378,17 +374,14 @@
System.out.println("FPW.finishTerm docFreq=" + stats.docFreq);
}
- // nocommit silly that skipper must write skip when we no
- // postings come after it, but if we don't do this, skip
- // reader incorrectly thinks it can read another level 0
- // skip entry here!:
- //if (docCount > blockSize && docBufferUpto > 0) {
- if (docCount > blockSize) {
- final int lastDocCount = blockSize*(docCount/blockSize);
+ // If there are vInt block following, we won't skip into it,
+ // but should skip to its startpoint
+ final int lastDocCount = blockSize*(docCount/blockSize);
+ if (docCount > blockSize && docCount != lastDocCount) {
if (DEBUG) {
- System.out.println(" bufferSkip at finishTerm: lastDocID=" + lastBlockDocID + " docCount=" + lastDocCount);
+ System.out.println(" bufferSkip at finishTerm: lastDocID=" + lastBlockDocIDs[0] + " docCount=" + lastDocCount);
}
- skipWriter.bufferSkip(lastBlockDocID, lastDocCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto);
+ skipWriter.bufferSkip(lastBlockDocIDs[0], lastDocCount, lastBlockPosFPs[0], lastBlockPayFPs[0], lastBlockPosBufferUptos[0], lastBlockStartOffsets[0], lastBlockPayloadByteUptos[0]);
}
if (DEBUG) {
Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (working copy)
@@ -23,12 +23,34 @@
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
-// nocommit do we need more frequent skips at level > 0?
-// 128*128 is immense? may need to decouple
+// nocommit may need to decouple
// baseSkipInterval & theRestSkipInterval?
+/**
+ * Write skip lists with multiple levels, and support skip within block ints.
+ *
+ * Assume that docFreq = 28, blockSize=12, and skipInterval=3
+ *
+ * | block#0 | | block#1 | |vInts|
+ * d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ * ^ ^ ^ ^ ^ ^ ^ ^ (level 0 skip point)
+ *
+ * Note that skipWriter will ignore first document in block#0, since
+ * it is useless as a skip point. Also, we'll never skip into the vInts
+ * block, only record skip data at the start its start point(if it exist).
+ *
+ * For each skip point, we will record:
+ * 1. lastDocID,
+ * 2. its related file points(position, payload),
+ * 3. related numbers or uptos(position, payload).
+ * 4. start offset.
+ *
+ */
final class BlockSkipWriter extends MultiLevelSkipListWriter {
private boolean DEBUG = BlockPostingsReader.DEBUG;
+ private int skipInterval;
+ private int blockSize;
+ private int skipsPerBlock;
private int[] lastSkipDoc;
private long[] lastSkipDocPointer;
@@ -52,11 +74,16 @@
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
- public BlockSkipWriter(int skipInterval, int maxSkipLevels, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
+ public BlockSkipWriter(int maxSkipLevels, int skipInterval, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
super(skipInterval, maxSkipLevels, docCount);
this.docOut = docOut;
this.posOut = posOut;
this.payOut = payOut;
+ this.skipInterval = skipInterval;
+ this.blockSize = blockSize;
+ this.skipsPerBlock = blockSize/skipInterval;
+ assert this.skipsPerBlock > 0 : this.skipsPerBlock;
+ assert blockSize % skipInterval == 0 : blockSize+" "+skipInterval;
lastSkipDoc = new int[maxSkipLevels];
lastSkipDocPointer = new long[maxSkipLevels];
@@ -108,6 +135,23 @@
this.curStartOffset = startOffset;
bufferSkip(numDocs);
}
+
+ /**
+ * Sets the values for skip data in current block
+ */
+ public void bufferSkip(int[] docs, int numDocs, long[] posFPs, long[] payFPs, int[] posBufferUptos, int[] startOffsets, int[] payloadByteUptos) throws IOException {
+ this.curDocPointer = docOut.getFilePointer();
+ // first doc is useless as skip point
+ for (int i=(numDocs==0 ? 1:0); i<skipsPerBlock; i++) {
+ this.curPosPointer = posFPs[i];
+ this.curPayPointer = payFPs[i];
+ this.curPosBufferUpto = posBufferUptos[i];
+ this.curPayloadByteUpto = payloadByteUptos[i];
+ this.curStartOffset = startOffsets[i];
+ this.curDoc = docs[i];
+ bufferSkip(numDocs+i*skipInterval);
+ }
+ }
@Override
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
Index: lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java (working copy)
@@ -22,7 +22,7 @@
* Encode all values in normal area with fixed bit width,
* which is determined by the max value in this block.
*/
-public class ForUtil {
+public final class ForUtil {
protected static final int[] MASK = { 0x00000000,
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
@@ -30,6 +30,8 @@
0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
0x7fffffff, 0xffffffff};
+ protected static final int blockSize = BlockPostingsFormat.DEFAULT_BLOCK_SIZE;
+ protected static final int totalIterations = blockSize/32;
/** Compress given int[] into Integer buffer, with For format
*
@@ -76,53 +78,73 @@
int numBits = ((header >> 8) & MASK[6]);
- decompressCore(intBuffer, data, numBits);
+ decompressFull(intBuffer, data, numBits);
}
+ /** Decompress given Integer buffer into int array.
+ *
+ * @param intBuffer integer buffer to hold compressed data
+ * @param data int array to hold uncompressed data
+ * @param header header for current block
+ * @param iteration this method will decode iter*32 ints from compressed data.
+ */
+ public static void decompress(IntBuffer intBuffer, int[] data, int header, int iteration) {
+ // since this buffer is reused at upper level, rewind first
+ intBuffer.rewind();
+
+ int numBits = ((header >> 8) & MASK[6]);
+
+ decompressPart(intBuffer, data, numBits, iteration);
+ }
+
+
+
/**
* IntBuffer will not be rewinded in this method, therefore
* caller should ensure that the position is set to the first
* encoded int before decoding.
*/
- static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) {
- assert numBits<=32;
+ static void decompressFull(IntBuffer intBuffer, int[] data, int numBits) {
+ assert numBits<=32 : numBits;
assert numBits>=0;
+ decompressPart(intBuffer, data, numBits, totalIterations);
+ }
- // TODO: PackedIntsDecompress is hardewired to size==128 only
+ static void decompressPart(IntBuffer intBuffer, int[] data, int numBits, int iteration) {
switch(numBits) {
- case 0: PackedIntsDecompress.decode0(intBuffer, data); break;
- case 1: PackedIntsDecompress.decode1(intBuffer, data); break;
- case 2: PackedIntsDecompress.decode2(intBuffer, data); break;
- case 3: PackedIntsDecompress.decode3(intBuffer, data); break;
- case 4: PackedIntsDecompress.decode4(intBuffer, data); break;
- case 5: PackedIntsDecompress.decode5(intBuffer, data); break;
- case 6: PackedIntsDecompress.decode6(intBuffer, data); break;
- case 7: PackedIntsDecompress.decode7(intBuffer, data); break;
- case 8: PackedIntsDecompress.decode8(intBuffer, data); break;
- case 9: PackedIntsDecompress.decode9(intBuffer, data); break;
- case 10: PackedIntsDecompress.decode10(intBuffer, data); break;
- case 11: PackedIntsDecompress.decode11(intBuffer, data); break;
- case 12: PackedIntsDecompress.decode12(intBuffer, data); break;
- case 13: PackedIntsDecompress.decode13(intBuffer, data); break;
- case 14: PackedIntsDecompress.decode14(intBuffer, data); break;
- case 15: PackedIntsDecompress.decode15(intBuffer, data); break;
- case 16: PackedIntsDecompress.decode16(intBuffer, data); break;
- case 17: PackedIntsDecompress.decode17(intBuffer, data); break;
- case 18: PackedIntsDecompress.decode18(intBuffer, data); break;
- case 19: PackedIntsDecompress.decode19(intBuffer, data); break;
- case 20: PackedIntsDecompress.decode20(intBuffer, data); break;
- case 21: PackedIntsDecompress.decode21(intBuffer, data); break;
- case 22: PackedIntsDecompress.decode22(intBuffer, data); break;
- case 23: PackedIntsDecompress.decode23(intBuffer, data); break;
- case 24: PackedIntsDecompress.decode24(intBuffer, data); break;
- case 25: PackedIntsDecompress.decode25(intBuffer, data); break;
- case 26: PackedIntsDecompress.decode26(intBuffer, data); break;
- case 27: PackedIntsDecompress.decode27(intBuffer, data); break;
- case 28: PackedIntsDecompress.decode28(intBuffer, data); break;
- case 29: PackedIntsDecompress.decode29(intBuffer, data); break;
- case 30: PackedIntsDecompress.decode30(intBuffer, data); break;
- case 31: PackedIntsDecompress.decode31(intBuffer, data); break;
- case 32: PackedIntsDecompress.decode32(intBuffer, data); break;
+ case 0: PackedIntsDecompress.decode0(intBuffer, data, iteration); break;
+ case 1: PackedIntsDecompress.decode1(intBuffer, data, iteration); break;
+ case 2: PackedIntsDecompress.decode2(intBuffer, data, iteration); break;
+ case 3: PackedIntsDecompress.decode3(intBuffer, data, iteration); break;
+ case 4: PackedIntsDecompress.decode4(intBuffer, data, iteration); break;
+ case 5: PackedIntsDecompress.decode5(intBuffer, data, iteration); break;
+ case 6: PackedIntsDecompress.decode6(intBuffer, data, iteration); break;
+ case 7: PackedIntsDecompress.decode7(intBuffer, data, iteration); break;
+ case 8: PackedIntsDecompress.decode8(intBuffer, data, iteration); break;
+ case 9: PackedIntsDecompress.decode9(intBuffer, data, iteration); break;
+ case 10: PackedIntsDecompress.decode10(intBuffer, data, iteration); break;
+ case 11: PackedIntsDecompress.decode11(intBuffer, data, iteration); break;
+ case 12: PackedIntsDecompress.decode12(intBuffer, data, iteration); break;
+ case 13: PackedIntsDecompress.decode13(intBuffer, data, iteration); break;
+ case 14: PackedIntsDecompress.decode14(intBuffer, data, iteration); break;
+ case 15: PackedIntsDecompress.decode15(intBuffer, data, iteration); break;
+ case 16: PackedIntsDecompress.decode16(intBuffer, data, iteration); break;
+ case 17: PackedIntsDecompress.decode17(intBuffer, data, iteration); break;
+ case 18: PackedIntsDecompress.decode18(intBuffer, data, iteration); break;
+ case 19: PackedIntsDecompress.decode19(intBuffer, data, iteration); break;
+ case 20: PackedIntsDecompress.decode20(intBuffer, data, iteration); break;
+ case 21: PackedIntsDecompress.decode21(intBuffer, data, iteration); break;
+ case 22: PackedIntsDecompress.decode22(intBuffer, data, iteration); break;
+ case 23: PackedIntsDecompress.decode23(intBuffer, data, iteration); break;
+ case 24: PackedIntsDecompress.decode24(intBuffer, data, iteration); break;
+ case 25: PackedIntsDecompress.decode25(intBuffer, data, iteration); break;
+ case 26: PackedIntsDecompress.decode26(intBuffer, data, iteration); break;
+ case 27: PackedIntsDecompress.decode27(intBuffer, data, iteration); break;
+ case 28: PackedIntsDecompress.decode28(intBuffer, data, iteration); break;
+ case 29: PackedIntsDecompress.decode29(intBuffer, data, iteration); break;
+ case 30: PackedIntsDecompress.decode30(intBuffer, data, iteration); break;
+ case 31: PackedIntsDecompress.decode31(intBuffer, data, iteration); break;
+ case 32: PackedIntsDecompress.decode32(intBuffer, data, iteration); break;
}
}
@@ -177,23 +199,22 @@
/**
* Generate the 4 byte header, which contains (from lsb to msb):
*
- * 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8)
* 6 bits for num of frame bits (when 0, values in this block are all the same)
- * other bits unused
+ * other bits for encoded block int size (excluded header)
*
*/
static int getHeader(int encodedSize, int numBits) {
- return (encodedSize)
- | ((numBits) << 8);
+ return (numBits)
+ | ((encodedSize) << 6);
}
/**
* Expert: get metadata from header.
*/
+ public static int getNumBits(int header) {
+ return ((header & MASK[6]));
+ }
public static int getEncodedSize(int header) {
- return ((header & MASK[8]))*4;
+ return ((header >>> 6))*4;
}
- public static int getNumBits(int header) {
- return ((header >> 8) & MASK[6]);
- }
}
Index: lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java (revision 1369254)
+++ lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java (working copy)
@@ -131,6 +131,7 @@
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
+ //System.out.println("["+docCount+"]"+" skipped "+numSkipped[level]);
if (numSkipped[level] > docCount) {
// this skip list is exhausted