| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java (working copy) |
| @@ -42,6 +42,7 @@ |
| private final int minTermBlockSize; |
| private final int maxTermBlockSize; |
| public final static int DEFAULT_BLOCK_SIZE = 128; |
| + public final static int DEFAULT_SKIP_INTERVAL = 32; |
| |
| public BlockPostingsFormat() { |
| this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); |
| @@ -62,8 +63,7 @@ |
| |
| @Override |
| public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| - // TODO: implement a new PostingsWriterBase to improve skip-settings |
| - PostingsWriterBase postingsWriter = new BlockPostingsWriter(state, 128); |
| + PostingsWriterBase postingsWriter = new BlockPostingsWriter(state); |
| |
| boolean success = false; |
| try { |
| @@ -86,8 +86,7 @@ |
| state.fieldInfos, |
| state.segmentInfo, |
| state.context, |
| - state.segmentSuffix, |
| - 128); |
| + state.segmentSuffix); |
| boolean success = false; |
| try { |
| FieldsProducer ret = new BlockTreeTermsReader(state.dir, |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java (working copy) |
| @@ -24,14 +24,37 @@ |
| import org.apache.lucene.store.IndexInput; |
| |
| /** |
| - * Implements the skip list reader for the 4.0 posting list format |
| + * Implements the skip list reader for block postings format |
| * that stores positions and payloads. |
| * |
| - * @see Lucene40PostingsFormat |
| - * @lucene.experimental |
| + * Although this skipper uses MultiLevelSkipListReader as an interface, |
| + * its definition of skip position will be a little different. |
| + * |
| + * For example, when df==2*skipInterval==6, |
| + * |
| + * 0 1 2 3 4 5 |
| + * d d d d d d (posting list) |
| + * ^ ^ (skip point in MultiLeveSkipWriter) |
| + * ^ (skip point in BlockSkipWriter) |
| + * |
| + * In this case, MultiLevelSkipListReader will use the last document as a skip point, |
| + * while BlockSkipReader should assume no skip point will comes. |
| + * |
| + * If we use the interface directly in BlockSkipReader, it may silly try to read |
| + * another skip data after the only skip point is loaded. |
| + * |
| + * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, |
| + * and numSkipped+skipInterval == df, the MultiLevelSkipListReader will assume the skip list |
| + * isn't exhausted yet, and try to load a non-existed skip point |
| + * |
| + * Therefore, we'll trim df before passing it to the interface. see trim(int) |
| + * |
| */ |
| final class BlockSkipReader extends MultiLevelSkipListReader { |
| private boolean DEBUG = BlockPostingsReader.DEBUG; |
| + private int skipInterval; |
| + private int blockSize; |
| + private int skipsPerBlock; |
| |
| private long docPointer[]; |
| private long posPointer[]; |
| @@ -47,8 +70,10 @@ |
| private long lastDocPointer; |
| private int lastPosBufferUpto; |
| |
| - public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { |
| + public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { |
| super(skipStream, maxSkipLevels, skipInterval); |
| + this.skipInterval = skipInterval; |
| + this.blockSize = blockSize; |
| docPointer = new long[maxSkipLevels]; |
| if (hasPos) { |
| posPointer = new long[maxSkipLevels]; |
| @@ -73,8 +98,23 @@ |
| } |
| } |
| |
| + |
| + /** |
| + * Trim original docFreq to tell skipReader read proper number of skip points. |
| + * |
| + * Since our definition in BlockSkip* is a little different from MultiLevelSkip* |
| + * This trimed docFreq will prevent skipReader from: |
| + * 1. silly reading a non-existed skip point after the last block boundary |
| + * 2. moving into the vInt block |
| + * |
| + */ |
| + protected int trim(int df) { |
| + int blockInts = df/blockSize*blockSize; |
| + return df % blockSize == 0? df - 1: blockInts + skipInterval - 1; |
| + } |
| + |
| public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) { |
| - super.init(skipPointer, df); |
| + super.init(skipPointer, trim(df)); |
| lastDocPointer = docBasePointer; |
| lastPosPointer = posBasePointer; |
| lastPayPointer = payBasePointer; |
| @@ -119,9 +159,6 @@ |
| @Override |
| protected void seekChild(int level) throws IOException { |
| super.seekChild(level); |
| - if (DEBUG) { |
| - System.out.println("seekChild level=" + level); |
| - } |
| docPointer[level] = lastDocPointer; |
| if (posPointer != null) { |
| posPointer[level] = lastPosPointer; |
| @@ -142,16 +179,9 @@ |
| protected void setLastSkipData(int level) { |
| super.setLastSkipData(level); |
| lastDocPointer = docPointer[level]; |
| - if (DEBUG) { |
| - System.out.println("setLastSkipData level=" + level); |
| - System.out.println(" lastDocPointer=" + lastDocPointer); |
| - } |
| if (posPointer != null) { |
| lastPosPointer = posPointer[level]; |
| lastPosBufferUpto = posBufferUpto[level]; |
| - if (DEBUG) { |
| - System.out.println(" lastPosPointer=" + lastPosPointer + " lastPosBUfferUpto=" + lastPosBufferUpto); |
| - } |
| if (payPointer != null) { |
| lastPayPointer = payPointer[level]; |
| } |
| @@ -166,27 +196,12 @@ |
| |
| @Override |
| protected int readSkipData(int level, IndexInput skipStream) throws IOException { |
| - if (DEBUG) { |
| - System.out.println("readSkipData level=" + level); |
| - } |
| int delta = skipStream.readVInt(); |
| - if (DEBUG) { |
| - System.out.println(" delta=" + delta); |
| - } |
| docPointer[level] += skipStream.readVInt(); |
| - if (DEBUG) { |
| - System.out.println(" docFP=" + docPointer[level]); |
| - } |
| |
| if (posPointer != null) { |
| posPointer[level] += skipStream.readVInt(); |
| - if (DEBUG) { |
| - System.out.println(" posFP=" + posPointer[level]); |
| - } |
| posBufferUpto[level] = skipStream.readVInt(); |
| - if (DEBUG) { |
| - System.out.println(" posBufferUpto=" + posBufferUpto[level]); |
| - } |
| |
| if (payloadByteUpto != null) { |
| payloadByteUpto[level] = skipStream.readVInt(); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (working copy) |
| @@ -55,10 +55,12 @@ |
| // nocommit |
| final String segment; |
| |
| - // NOTE: not private to avoid access$NNN methods: |
| - final int blockSize; |
| + // nocommit: is it ok to set these two fixed? |
| + final static int blockSize = BlockPostingsFormat.DEFAULT_BLOCK_SIZE; |
| + final static int skipInterval = BlockPostingsFormat.DEFAULT_SKIP_INTERVAL; |
| + final static int skipsPerBlock = blockSize/skipInterval; |
| |
| - public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix, int blockSize) throws IOException { |
| + public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { |
| boolean success = false; |
| segment = segmentInfo.name; |
| IndexInput docIn = null; |
| @@ -100,7 +102,7 @@ |
| } |
| } |
| |
| - this.blockSize = blockSize; |
| + assert blockSize % skipInterval == 0; |
| } |
| |
| @Override |
| @@ -116,6 +118,24 @@ |
| } |
| } |
| |
| + static void readVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, boolean indexHasFreq) throws IOException { |
| + if (indexHasFreq) { |
| + for(int i=0;i<num;i++) { |
| + final int code = docIn.readVInt(); |
| + docBuffer[i] = code >>> 1; |
| + if ((code & 1) != 0) { |
| + freqBuffer[i] = 1; |
| + } else { |
| + freqBuffer[i] = docIn.readVInt(); |
| + } |
| + } |
| + } else { |
| + for(int i=0;i<num;i++) { |
| + docBuffer[i] = docIn.readVInt(); |
| + } |
| + } |
| + } |
| + |
| static void readBlock(IndexInput in, byte[] encoded, IntBuffer encodedBuffer, int[] buffer) throws IOException { |
| int header = in.readVInt(); |
| in.readBytes(encoded, 0, ForUtil.getEncodedSize(header)); |
| @@ -300,7 +320,8 @@ |
| } |
| } |
| |
| - final class BlockDocsEnum extends DocsEnum { |
| + |
| + final public class BlockDocsEnum extends DocsEnum { |
| private final byte[] encoded; |
| private final IntBuffer encodedBuffer; |
| |
| @@ -309,6 +330,11 @@ |
| |
| private int docBufferUpto; |
| |
| + // How may ints encoded as ForBlock |
| + private int blockInts; |
| + |
| + private boolean loadNextBlock; |
| + |
| private BlockSkipReader skipper; |
| private boolean skipped; |
| |
| @@ -343,7 +369,7 @@ |
| indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; |
| indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| indexHasPayloads = fieldInfo.hasPayloads(); |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| } |
| |
| @@ -356,9 +382,6 @@ |
| |
| public DocsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException { |
| this.liveDocs = liveDocs; |
| - if (DEBUG) { |
| - System.out.println(" FPR.reset: seg=" + segment + " termState=" + termState); |
| - } |
| docFreq = termState.docFreq; |
| docTermStartFP = termState.docStartFP; |
| docIn.seek(docTermStartFP); |
| @@ -371,6 +394,8 @@ |
| accum = 0; |
| docUpto = 0; |
| docBufferUpto = blockSize; |
| + blockInts = docFreq/blockSize*blockSize; |
| + loadNextBlock = false; |
| skipped = false; |
| return this; |
| } |
| @@ -385,70 +410,45 @@ |
| return doc; |
| } |
| |
| + |
| + /** |
| + * Decode a piece of ints int docDelta(and docTermFreq) buffer. |
| + * |
| + * When fully is set false, we'll simplly skip to the end of current block |
| + * after partial decoding is done. |
| + */ |
| private void refillDocs() throws IOException { |
| final int left = docFreq - docUpto; |
| assert left > 0; |
| |
| - if (left >= blockSize) { |
| - if (DEBUG) { |
| - System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); |
| - } |
| + // We are still reading inside a block, otherwise we |
| + // should consider to handle the tail after these blocks |
| + if (docUpto < blockInts) { |
| + //System.out.println("["+docFreq+"]"+" refillDoc"); |
| readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer); |
| - |
| if (indexHasFreq) { |
| - if (DEBUG) { |
| - System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); |
| - } |
| readBlock(docIn, encoded, encodedBuffer, freqBuffer); |
| } |
| } else { |
| - // Read vInts: |
| - if (DEBUG) { |
| - System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer()); |
| - } |
| - for(int i=0;i<left;i++) { |
| - final int code = docIn.readVInt(); |
| - if (indexHasFreq) { |
| - docDeltaBuffer[i] = code >>> 1; |
| - if ((code & 1) != 0) { |
| - freqBuffer[i] = 1; |
| - } else { |
| - freqBuffer[i] = docIn.readVInt(); |
| - } |
| - } else { |
| - docDeltaBuffer[i] = code; |
| - } |
| - } |
| + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq); |
| } |
| - docBufferUpto = 0; |
| + if (loadNextBlock) { |
| + loadNextBlock = false; |
| + } else { |
| + docBufferUpto = 0; |
| + } |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| - |
| - if (DEBUG) { |
| - System.out.println("\nFPR.nextDoc"); |
| - } |
| - |
| while (true) { |
| - if (DEBUG) { |
| - System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); |
| - } |
| - |
| if (docUpto == docFreq) { |
| - if (DEBUG) { |
| - System.out.println(" return doc=END"); |
| - } |
| return doc = NO_MORE_DOCS; |
| } |
| - |
| - if (docBufferUpto == blockSize) { |
| + //System.out.println("["+docFreq+"]"+" nextDoc"); |
| + if (loadNextBlock || docBufferUpto == blockSize) { |
| refillDocs(); |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); |
| - } |
| accum += docDeltaBuffer[docBufferUpto]; |
| docUpto++; |
| |
| @@ -456,20 +456,12 @@ |
| doc = accum; |
| freq = freqBuffer[docBufferUpto]; |
| docBufferUpto++; |
| - if (DEBUG) { |
| - System.out.println(" return doc=" + doc + " freq=" + freq); |
| - } |
| return doc; |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" doc=" + accum + " is deleted; try next doc"); |
| - } |
| - |
| docBufferUpto++; |
| } |
| } |
| - |
| + |
| @Override |
| public int advance(int target) throws IOException { |
| // nocommit make frq block load lazy/skippable |
| @@ -478,16 +470,18 @@ |
| // nocommit put cheating back! does it help? |
| // nocommit use skipper!!! it has next last doc id!! |
| //if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) { |
| - if (docFreq > blockSize && target - accum > blockSize) { |
| |
| - if (DEBUG) { |
| - System.out.println("load skipper"); |
| - } |
| + // We allow the skipper to run iff: |
| + // 1. We have enough ints to use partial decode |
| + // 2. The target is not within one skip |
| + // 3. We won't skip into vInt block |
| + if (docFreq > blockSize && target - accum > skipInterval && accum + skipInterval <= blockInts) { |
| |
| if (skipper == null) { |
| // Lazy init: first time this enum has ever been used for skipping |
| skipper = new BlockSkipReader((IndexInput) docIn.clone(), |
| BlockPostingsWriter.maxSkipLevels, |
| + skipInterval, |
| blockSize, |
| indexHasPos, |
| indexHasOffsets, |
| @@ -498,6 +492,14 @@ |
| assert skipOffset != -1; |
| // This is the first time this enum has skipped |
| // since reset() was called; load the skip data: |
| + // |
| + // We don't always use the real docFreq, to prevent the skipper |
| + // from |
| + // 1. silly reading a non-existed skip point after the last block boundary |
| + // 2. moving into the vInt block |
| + // |
| + // See BlockSkipReader for detailed explanation |
| + // |
| skipper.init(docTermStartFP+skipOffset, docTermStartFP, 0, 0, docFreq); |
| skipped = true; |
| } |
| @@ -506,35 +508,23 @@ |
| |
| if (newDocUpto > docUpto) { |
| // Skipper moved |
| - |
| - if (DEBUG) { |
| - System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer()); |
| - } |
| - |
| - assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto; |
| + assert newDocUpto % skipInterval == (skipInterval-1): "got " + newDocUpto; |
| docUpto = newDocUpto+1; |
| |
| - // Force block read next: |
| - docBufferUpto = blockSize; |
| - accum = skipper.getDoc(); |
| - docIn.seek(skipper.getDocPointer()); |
| + // Force to read next slice of block |
| + docBufferUpto = docUpto % blockSize; |
| + loadNextBlock = true; |
| + accum = skipper.getDoc(); // actually, this is just lastSkipEntry |
| + docIn.seek(skipper.getDocPointer()); // now point to the block we want to search |
| } |
| } |
| |
| // Now scan: |
| while (nextDoc() != NO_MORE_DOCS) { |
| if (doc >= target) { |
| - if (DEBUG) { |
| - System.out.println(" advance return doc=" + doc); |
| - } |
| return doc; |
| } |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" advance return doc=END"); |
| - } |
| - |
| return NO_MORE_DOCS; |
| } |
| } |
| @@ -552,6 +542,9 @@ |
| private int docBufferUpto; |
| private int posBufferUpto; |
| |
| + private int blockInts; |
| + private boolean loadNextBlock; |
| + |
| private BlockSkipReader skipper; |
| private boolean skipped; |
| |
| @@ -604,7 +597,7 @@ |
| this.startDocIn = BlockPostingsReader.this.docIn; |
| this.docIn = (IndexInput) startDocIn.clone(); |
| this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone(); |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| indexHasPayloads = fieldInfo.hasPayloads(); |
| @@ -618,9 +611,6 @@ |
| |
| public DocsAndPositionsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException { |
| this.liveDocs = liveDocs; |
| - if (DEBUG) { |
| - System.out.println(" FPR.reset: termState=" + termState); |
| - } |
| docFreq = termState.docFreq; |
| docTermStartFP = termState.docStartFP; |
| posTermStartFP = termState.posStartFP; |
| @@ -641,6 +631,8 @@ |
| accum = 0; |
| docUpto = 0; |
| docBufferUpto = blockSize; |
| + blockInts = docFreq/blockSize*blockSize; |
| + loadNextBlock = false; |
| skipped = false; |
| return this; |
| } |
| @@ -658,45 +650,22 @@ |
| private void refillDocs() throws IOException { |
| final int left = docFreq - docUpto; |
| assert left > 0; |
| - |
| - if (left >= blockSize) { |
| - if (DEBUG) { |
| - System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); |
| - } |
| - |
| + if (docUpto < blockInts) { |
| + //System.out.println("["+docFreq+"]"+" refillDoc"); |
| readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer); |
| - |
| - if (DEBUG) { |
| - System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); |
| - } |
| - |
| readBlock(docIn, encoded, encodedBuffer, freqBuffer); |
| } else { |
| - // Read vInts: |
| - if (DEBUG) { |
| - System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer()); |
| - } |
| - for(int i=0;i<left;i++) { |
| - final int code = docIn.readVInt(); |
| - docDeltaBuffer[i] = code >>> 1; |
| - if ((code & 1) != 0) { |
| - freqBuffer[i] = 1; |
| - } else { |
| - freqBuffer[i] = docIn.readVInt(); |
| - } |
| - } |
| + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); |
| } |
| - docBufferUpto = 0; |
| + if (loadNextBlock) { |
| + loadNextBlock = false; |
| + } else { |
| + docBufferUpto = 0; |
| + } |
| } |
| |
| private void refillPositions() throws IOException { |
| - if (DEBUG) { |
| - System.out.println(" refillPositions"); |
| - } |
| if (posIn.getFilePointer() == lastPosBlockFP) { |
| - if (DEBUG) { |
| - System.out.println(" vInt pos block @ fp=" + posIn.getFilePointer() + " hasPayloads=" + indexHasPayloads + " hasOffsets=" + indexHasOffsets); |
| - } |
| final int count = posIn.readVInt(); |
| int payloadLength = 0; |
| for(int i=0;i<count;i++) { |
| @@ -712,43 +681,26 @@ |
| } else { |
| posDeltaBuffer[i] = code; |
| } |
| - |
| if (indexHasOffsets) { |
| posIn.readVInt(); |
| posIn.readVInt(); |
| } |
| } |
| } else { |
| - if (DEBUG) { |
| - System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer()); |
| - } |
| readBlock(posIn, encoded, encodedBuffer, posDeltaBuffer); |
| } |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| - |
| - if (DEBUG) { |
| - System.out.println(" FPR.nextDoc"); |
| - } |
| - |
| while (true) { |
| - if (DEBUG) { |
| - System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); |
| - } |
| - |
| if (docUpto == docFreq) { |
| return doc = NO_MORE_DOCS; |
| } |
| - |
| - if (docBufferUpto == blockSize) { |
| + //System.out.println("["+docFreq+"]"+" nextDoc"); |
| + if (loadNextBlock || docBufferUpto == blockSize) { |
| refillDocs(); |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); |
| - } |
| accum += docDeltaBuffer[docBufferUpto]; |
| freq = freqBuffer[docBufferUpto]; |
| posPendingCount += freq; |
| @@ -757,43 +709,27 @@ |
| |
| if (liveDocs == null || liveDocs.get(accum)) { |
| doc = accum; |
| - if (DEBUG) { |
| - System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount); |
| - } |
| position = 0; |
| return doc; |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" doc=" + accum + " is deleted; try next doc"); |
| - } |
| } |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| // nocommit make frq block load lazy/skippable |
| - if (DEBUG) { |
| - System.out.println(" FPR.advance target=" + target); |
| - } |
| |
| // nocommit 2 is heuristic guess!! |
| // nocommit put cheating back! does it help? |
| // nocommit use skipper!!! it has next last doc id!! |
| //if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) { |
| - if (docFreq > blockSize && target - accum > blockSize) { |
| + if (docFreq > blockSize && target - accum > skipInterval && accum + skipInterval <= blockInts) { |
| |
| - if (DEBUG) { |
| - System.out.println(" try skipper"); |
| - } |
| - |
| if (skipper == null) { |
| // Lazy init: first time this enum has ever been used for skipping |
| - if (DEBUG) { |
| - System.out.println(" create skipper"); |
| - } |
| skipper = new BlockSkipReader((IndexInput) docIn.clone(), |
| BlockPostingsWriter.maxSkipLevels, |
| + skipInterval, |
| blockSize, |
| true, |
| indexHasOffsets, |
| @@ -804,9 +740,6 @@ |
| assert skipOffset != -1; |
| // This is the first time this enum has skipped |
| // since reset() was called; load the skip data: |
| - if (DEBUG) { |
| - System.out.println(" init skipper"); |
| - } |
| skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq); |
| skipped = true; |
| } |
| @@ -815,16 +748,11 @@ |
| |
| if (newDocUpto > docUpto) { |
| // Skipper moved |
| - |
| - if (DEBUG) { |
| - System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto()); |
| - } |
| - |
| - assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto; |
| + assert newDocUpto % skipInterval == (skipInterval-1): "got " + newDocUpto; |
| docUpto = newDocUpto+1; |
| |
| - // Force block read next: |
| - docBufferUpto = blockSize; |
| + docBufferUpto = docUpto % blockSize; |
| + loadNextBlock = true; |
| accum = skipper.getDoc(); |
| docIn.seek(skipper.getDocPointer()); |
| posPendingFP = skipper.getPosPointer(); |
| @@ -835,16 +763,10 @@ |
| // Now scan: |
| while (nextDoc() != NO_MORE_DOCS) { |
| if (doc >= target) { |
| - if (DEBUG) { |
| - System.out.println(" advance return doc=" + doc); |
| - } |
| return doc; |
| } |
| } |
| |
| - if (DEBUG) { |
| - System.out.println(" advance return doc=END"); |
| - } |
| |
| return NO_MORE_DOCS; |
| } |
| @@ -856,31 +778,19 @@ |
| private void skipPositions() throws IOException { |
| // Skip positions now: |
| int toSkip = posPendingCount - freq; |
| - if (DEBUG) { |
| - System.out.println(" FPR.skipPositions: toSkip=" + toSkip); |
| - } |
| |
| final int leftInBlock = blockSize - posBufferUpto; |
| if (toSkip < leftInBlock) { |
| posBufferUpto += toSkip; |
| - if (DEBUG) { |
| - System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); |
| - } |
| } else { |
| toSkip -= leftInBlock; |
| while(toSkip >= blockSize) { |
| - if (DEBUG) { |
| - System.out.println(" skip whole block @ fp=" + posIn.getFilePointer()); |
| - } |
| assert posIn.getFilePointer() != lastPosBlockFP; |
| skipBlock(posIn); |
| toSkip -= blockSize; |
| } |
| refillPositions(); |
| posBufferUpto = toSkip; |
| - if (DEBUG) { |
| - System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); |
| - } |
| } |
| |
| position = 0; |
| @@ -888,13 +798,7 @@ |
| |
| @Override |
| public int nextPosition() throws IOException { |
| - if (DEBUG) { |
| - System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto); |
| - } |
| if (posPendingFP != -1) { |
| - if (DEBUG) { |
| - System.out.println(" seek to pendingFP=" + posPendingFP); |
| - } |
| posIn.seek(posPendingFP); |
| posPendingFP = -1; |
| |
| @@ -913,9 +817,6 @@ |
| } |
| position += posDeltaBuffer[posBufferUpto++]; |
| posPendingCount--; |
| - if (DEBUG) { |
| - System.out.println(" return pos=" + position); |
| - } |
| return position; |
| } |
| |
| @@ -965,6 +866,9 @@ |
| private int docBufferUpto; |
| private int posBufferUpto; |
| |
| + private int blockInts; |
| + private boolean loadNextBlock; |
| + |
| private BlockSkipReader skipper; |
| private boolean skipped; |
| |
| @@ -1024,7 +928,7 @@ |
| this.docIn = (IndexInput) startDocIn.clone(); |
| this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone(); |
| this.payIn = (IndexInput) BlockPostingsReader.this.payIn.clone(); |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| if (indexHasOffsets) { |
| @@ -1057,9 +961,6 @@ |
| |
| public EverythingEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException { |
| this.liveDocs = liveDocs; |
| - if (DEBUG) { |
| - System.out.println(" FPR.reset: termState=" + termState); |
| - } |
| docFreq = termState.docFreq; |
| docTermStartFP = termState.docStartFP; |
| posTermStartFP = termState.posStartFP; |
| @@ -1081,6 +982,8 @@ |
| accum = 0; |
| docUpto = 0; |
| docBufferUpto = blockSize; |
| + blockInts = docFreq/blockSize*blockSize; |
| + loadNextBlock = false; |
| skipped = false; |
| return this; |
| } |
| @@ -1099,44 +1002,22 @@ |
| final int left = docFreq - docUpto; |
| assert left > 0; |
| |
| - if (left >= blockSize) { |
| - if (DEBUG) { |
| - System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); |
| - } |
| - |
| + if (docUpto < blockInts) { |
| + //System.out.println("["+docFreq+"]"+" refillDoc"); |
| readBlock(docIn, encoded, encodedBuffer, docDeltaBuffer); |
| - |
| - if (DEBUG) { |
| - System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); |
| - } |
| - |
| readBlock(docIn, encoded, encodedBuffer, freqBuffer); |
| } else { |
| - // Read vInts: |
| - if (DEBUG) { |
| - System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer()); |
| - } |
| - for(int i=0;i<left;i++) { |
| - final int code = docIn.readVInt(); |
| - docDeltaBuffer[i] = code >>> 1; |
| - if ((code & 1) != 0) { |
| - freqBuffer[i] = 1; |
| - } else { |
| - freqBuffer[i] = docIn.readVInt(); |
| - } |
| - } |
| + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); |
| } |
| - docBufferUpto = 0; |
| + if (loadNextBlock) { |
| + loadNextBlock = false; |
| + } else { |
| + docBufferUpto = 0; |
| + } |
| } |
| |
| private void refillPositions() throws IOException { |
| - if (DEBUG) { |
| - System.out.println(" refillPositions"); |
| - } |
| if (posIn.getFilePointer() == lastPosBlockFP) { |
| - if (DEBUG) { |
| - System.out.println(" vInt pos block @ fp=" + posIn.getFilePointer() + " hasPayloads=" + indexHasPayloads + " hasOffsets=" + indexHasOffsets); |
| - } |
| final int count = posIn.readVInt(); |
| int payloadLength = 0; |
| payloadByteUpto = 0; |
| @@ -1146,9 +1027,6 @@ |
| if ((code & 1) != 0) { |
| payloadLength = posIn.readVInt(); |
| } |
| - if (DEBUG) { |
| - System.out.println(" i=" + i + " payloadLen=" + payloadLength); |
| - } |
| payloadLengthBuffer[i] = payloadLength; |
| posDeltaBuffer[i] = code >>> 1; |
| if (payloadLength != 0) { |
| @@ -1164,32 +1042,17 @@ |
| } |
| |
| if (indexHasOffsets) { |
| - if (DEBUG) { |
| - System.out.println(" i=" + i + " read offsets from posIn.fp=" + posIn.getFilePointer()); |
| - } |
| offsetStartDeltaBuffer[i] = posIn.readVInt(); |
| offsetLengthBuffer[i] = posIn.readVInt(); |
| - if (DEBUG) { |
| - System.out.println(" startOffDelta=" + offsetStartDeltaBuffer[i] + " offsetLen=" + offsetLengthBuffer[i]); |
| - } |
| } |
| } |
| payloadByteUpto = 0; |
| } else { |
| - if (DEBUG) { |
| - System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer()); |
| - } |
| readBlock(posIn, encoded, encodedBuffer, posDeltaBuffer); |
| |
| if (indexHasPayloads) { |
| - if (DEBUG) { |
| - System.out.println(" bulk payload block @ pay.fp=" + payIn.getFilePointer()); |
| - } |
| readBlock(payIn, encoded, encodedBuffer, payloadLengthBuffer); |
| int numBytes = payIn.readVInt(); |
| - if (DEBUG) { |
| - System.out.println(" " + numBytes + " payload bytes @ pay.fp=" + payIn.getFilePointer()); |
| - } |
| if (numBytes > payloadBytes.length) { |
| payloadBytes = ArrayUtil.grow(payloadBytes, numBytes); |
| } |
| @@ -1198,9 +1061,6 @@ |
| } |
| |
| if (indexHasOffsets) { |
| - if (DEBUG) { |
| - System.out.println(" bulk offset block @ pay.fp=" + payIn.getFilePointer()); |
| - } |
| readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaBuffer); |
| readBlock(payIn, encoded, encodedBuffer, offsetLengthBuffer); |
| } |
| @@ -1209,32 +1069,18 @@ |
| |
| @Override |
| public int nextDoc() throws IOException { |
| - |
| - if (DEBUG) { |
| - System.out.println(" FPR.nextDoc"); |
| - } |
| - |
| if (indexHasPayloads) { |
| payloadByteUpto += payloadLength; |
| payloadLength = 0; |
| } |
| - |
| while (true) { |
| - if (DEBUG) { |
| - System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); |
| - } |
| - |
| if (docUpto == docFreq) { |
| return doc = NO_MORE_DOCS; |
| } |
| - |
| - if (docBufferUpto == blockSize) { |
| + //System.out.println("["+docFreq+"]"+" nextDoc"); |
| + if (loadNextBlock || docBufferUpto == blockSize) { |
| refillDocs(); |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); |
| - } |
| accum += docDeltaBuffer[docBufferUpto]; |
| freq = freqBuffer[docBufferUpto]; |
| posPendingCount += freq; |
| @@ -1243,45 +1089,30 @@ |
| |
| if (liveDocs == null || liveDocs.get(accum)) { |
| doc = accum; |
| - if (DEBUG) { |
| - System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount); |
| - } |
| position = 0; |
| payloadLength = 0; |
| lastStartOffset = 0; |
| return doc; |
| } |
| - |
| - if (DEBUG) { |
| - System.out.println(" doc=" + accum + " is deleted; try next doc"); |
| - } |
| } |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| // nocommit make frq block load lazy/skippable |
| - if (DEBUG) { |
| - System.out.println(" FPR.advance target=" + target); |
| - } |
| |
| // nocommit 2 is heuristic guess!! |
| // nocommit put cheating back! does it help? |
| // nocommit use skipper!!! it has next last doc id!! |
| //if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) { |
| - if (docFreq > blockSize && target - accum > blockSize) { |
| + if (docFreq > blockSize && target - accum > skipInterval && accum + skipInterval <= blockInts) { |
| |
| - if (DEBUG) { |
| - System.out.println(" try skipper"); |
| - } |
| |
| if (skipper == null) { |
| // Lazy init: first time this enum has ever been used for skipping |
| - if (DEBUG) { |
| - System.out.println(" create skipper"); |
| - } |
| skipper = new BlockSkipReader((IndexInput) docIn.clone(), |
| BlockPostingsWriter.maxSkipLevels, |
| + skipInterval, |
| blockSize, |
| true, |
| indexHasOffsets, |
| @@ -1292,9 +1123,6 @@ |
| assert skipOffset != -1; |
| // This is the first time this enum has skipped |
| // since reset() was called; load the skip data: |
| - if (DEBUG) { |
| - System.out.println(" init skipper"); |
| - } |
| skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq); |
| skipped = true; |
| } |
| @@ -1303,16 +1131,11 @@ |
| |
| if (newDocUpto > docUpto) { |
| // Skipper moved |
| - |
| - if (DEBUG) { |
| - System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset); |
| - } |
| - |
| - assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto; |
| + assert newDocUpto % skipInterval == (skipInterval-1): "got " + newDocUpto; |
| docUpto = newDocUpto+1; |
| |
| - // Force block read next: |
| - docBufferUpto = blockSize; |
| + docBufferUpto = docUpto % blockSize; |
| + loadNextBlock = true; |
| accum = skipper.getDoc(); |
| docIn.seek(skipper.getDocPointer()); |
| posPendingFP = skipper.getPosPointer(); |
| @@ -1326,16 +1149,10 @@ |
| // Now scan: |
| while (nextDoc() != NO_MORE_DOCS) { |
| if (doc >= target) { |
| - if (DEBUG) { |
| - System.out.println(" advance return doc=" + doc); |
| - } |
| return doc; |
| } |
| } |
| |
| - if (DEBUG) { |
| - System.out.println(" advance return doc=END"); |
| - } |
| |
| return NO_MORE_DOCS; |
| } |
| @@ -1347,9 +1164,6 @@ |
| private void skipPositions() throws IOException { |
| // Skip positions now: |
| int toSkip = posPendingCount - freq; |
| - if (DEBUG) { |
| - System.out.println(" FPR.skipPositions: toSkip=" + toSkip); |
| - } |
| |
| final int leftInBlock = blockSize - posBufferUpto; |
| if (toSkip < leftInBlock) { |
| @@ -1363,15 +1177,9 @@ |
| } |
| posBufferUpto++; |
| } |
| - if (DEBUG) { |
| - System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); |
| - } |
| } else { |
| toSkip -= leftInBlock; |
| while(toSkip >= blockSize) { |
| - if (DEBUG) { |
| - System.out.println(" skip whole block @ fp=" + posIn.getFilePointer()); |
| - } |
| assert posIn.getFilePointer() != lastPosBlockFP; |
| skipBlock(posIn); |
| |
| @@ -1407,9 +1215,6 @@ |
| } |
| posBufferUpto++; |
| } |
| - if (DEBUG) { |
| - System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); |
| - } |
| } |
| |
| position = 0; |
| @@ -1421,20 +1226,11 @@ |
| |
| @Override |
| public int nextPosition() throws IOException { |
| - if (DEBUG) { |
| - System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto + " payloadByteUpto=" + payloadByteUpto); |
| - } |
| if (posPendingFP != -1) { |
| - if (DEBUG) { |
| - System.out.println(" seek pos to pendingFP=" + posPendingFP); |
| - } |
| posIn.seek(posPendingFP); |
| posPendingFP = -1; |
| |
| if (payPendingFP != -1) { |
| - if (DEBUG) { |
| - System.out.println(" seek pay to pendingFP=" + payPendingFP); |
| - } |
| payIn.seek(payPendingFP); |
| payPendingFP = -1; |
| } |
| @@ -1444,11 +1240,6 @@ |
| } |
| |
| if (indexHasPayloads) { |
| - if (DEBUG) { |
| - if (payloadLength != 0) { |
| - System.out.println(" skip unread payload length=" + payloadLength); |
| - } |
| - } |
| payloadByteUpto += payloadLength; |
| payloadLength = 0; |
| } |
| @@ -1476,9 +1267,6 @@ |
| |
| posBufferUpto++; |
| posPendingCount--; |
| - if (DEBUG) { |
| - System.out.println(" return pos=" + position); |
| - } |
| return position; |
| } |
| |
| @@ -1499,9 +1287,6 @@ |
| |
| @Override |
| public BytesRef getPayload() { |
| - if (DEBUG) { |
| - System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto); |
| - } |
| payload.bytes = payloadBytes; |
| payload.offset = payloadByteUpto; |
| payload.length = payloadLength; |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py (working copy) |
| @@ -75,18 +75,18 @@ |
| w("final class PackedIntsDecompress {\n") |
| |
| w('\n // nocommit: assess perf of this to see if specializing is really needed\n') |
| - w('\n // NOTE: hardwired to blockSize == 128\n\n') |
| + w('\n // NOTE: hardwired to blockSize == 32*k \n\n') |
| |
| - w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output) {\n') |
| - w(' Arrays.fill(output, compressedBuffer.get());\n') |
| + w(' public static void decode0(final IntBuffer compressedBuffer, final int[] output, int iteration) {\n') |
| + w(' Arrays.fill(output, 0, iteration*32, compressedBuffer.get());\n') |
| w(' }\n') |
| |
| for numFrameBits in xrange(1, 33): |
| - w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits) |
| + w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output, int iteration) {\n' % numFrameBits) |
| w(' final int numFrameBits = %d;\n' % numFrameBits) |
| w(' final int mask = (int) ((1L<<numFrameBits) - 1);\n') |
| w(' int outputOffset = 0;\n') |
| - w(' for(int step=0;step<4;step++) {\n') |
| + w(' for(int step=0;step<iteration;step++) {\n') |
| |
| for i in range(numFrameBits): # declare int vars and init from buffer |
| w(" int intValue" + str(i) + " = compressedBuffer.get();\n") |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/PackedIntsDecompress.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/PackedIntsDecompress.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/PackedIntsDecompress.java (working copy) |
| @@ -25,16 +25,16 @@ |
| |
| // nocommit: assess perf of this to see if specializing is really needed |
| |
| - // NOTE: hardwired to blockSize == 128 |
| + // NOTE: hardwired to blockSize == 32*k |
| |
| - public static void decode0(final IntBuffer compressedBuffer, final int[] output) { |
| - Arrays.fill(output, compressedBuffer.get()); |
| + public static void decode0(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| + Arrays.fill(output, 0, iteration*32, compressedBuffer.get()); |
| } |
| - public static void decode1(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode1(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 1; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| output[0 + outputOffset] = intValue0 & mask; |
| output[1 + outputOffset] = (intValue0 >>> 1) & mask; |
| @@ -71,11 +71,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode2(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode2(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 2; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| output[0 + outputOffset] = intValue0 & mask; |
| @@ -113,11 +113,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode3(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode3(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 3; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -156,11 +156,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode4(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode4(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 4; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -200,11 +200,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode5(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode5(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 5; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -245,11 +245,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode6(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode6(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 6; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -291,11 +291,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode7(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode7(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 7; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -338,11 +338,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode8(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode8(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 8; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -386,11 +386,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode9(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode9(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 9; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -435,11 +435,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode10(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode10(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 10; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -485,11 +485,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode11(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode11(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 11; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -536,11 +536,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode12(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode12(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 12; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -588,11 +588,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode13(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode13(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 13; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -641,11 +641,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode14(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode14(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 14; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -695,11 +695,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode15(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode15(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 15; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -750,11 +750,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode16(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode16(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 16; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -806,11 +806,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode17(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode17(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 17; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -863,11 +863,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode18(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode18(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 18; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -921,11 +921,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode19(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode19(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 19; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -980,11 +980,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode20(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode20(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 20; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1040,11 +1040,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode21(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode21(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 21; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1101,11 +1101,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode22(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode22(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 22; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1163,11 +1163,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode23(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode23(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 23; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1226,11 +1226,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode24(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode24(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 24; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1290,11 +1290,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode25(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode25(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 25; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1355,11 +1355,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode26(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode26(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 26; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1421,11 +1421,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode27(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode27(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 27; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1488,11 +1488,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode28(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode28(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 28; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1556,11 +1556,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode29(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode29(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 29; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1625,11 +1625,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode30(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode30(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 30; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1695,11 +1695,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode31(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode31(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 31; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| @@ -1766,11 +1766,11 @@ |
| outputOffset += 32; |
| } |
| } |
| - public static void decode32(final IntBuffer compressedBuffer, final int[] output) { |
| + public static void decode32(final IntBuffer compressedBuffer, final int[] output, int iteration) { |
| final int numFrameBits = 32; |
| final int mask = (int) ((1L<<numFrameBits) - 1); |
| int outputOffset = 0; |
| - for(int step=0;step<4;step++) { |
| + for(int step=0;step<iteration;step++) { |
| int intValue0 = compressedBuffer.get(); |
| int intValue1 = compressedBuffer.get(); |
| int intValue2 = compressedBuffer.get(); |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (working copy) |
| @@ -60,9 +60,11 @@ |
| final IndexOutput posOut; |
| final IndexOutput payOut; |
| |
| - static final int DEFAULT_BLOCK_SIZE = 128; |
| + final static int blockSize = BlockPostingsFormat.DEFAULT_BLOCK_SIZE; |
| + final static int skipInterval = BlockPostingsFormat.DEFAULT_SKIP_INTERVAL; |
| |
| - final int blockSize; |
| + // how many slices(or intervals) one block holds |
| + final static int skipsPerBlock = blockSize/skipInterval; |
| |
| private IndexOutput termsOut; |
| |
| @@ -90,13 +92,13 @@ |
| private byte[] payloadBytes; |
| private int payloadByteUpto; |
| |
| - private int lastBlockDocID; |
| - private boolean saveNextPosBlock; |
| - private long lastBlockPosFP; |
| - private long lastBlockPayFP; |
| - private int lastBlockPosBufferUpto; |
| - private int lastBlockStartOffset; |
| - private int lastBlockPayloadByteUpto; |
| + final int[] lastBlockDocIDs; |
| + private long[] lastBlockPosFPs; |
| + private long[] lastBlockPayFPs; |
| + private int[] lastBlockPosBufferUptos; |
| + private int[] lastBlockStartOffsets; |
| + private int[] lastBlockPayloadByteUptos; |
| + |
| private int lastDocID; |
| private int lastPosition; |
| private int lastStartOffset; |
| @@ -107,9 +109,8 @@ |
| |
| private final BlockSkipWriter skipWriter; |
| |
| - public BlockPostingsWriter(SegmentWriteState state, int blockSize) throws IOException { |
| + public BlockPostingsWriter(SegmentWriteState state) throws IOException { |
| super(); |
| - this.blockSize = blockSize; |
| |
| docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.DOC_EXTENSION), |
| state.context); |
| @@ -164,14 +165,22 @@ |
| docDeltaBuffer = new int[blockSize]; |
| freqBuffer = new int[blockSize]; |
| |
| - skipWriter = new BlockSkipWriter(blockSize, |
| - maxSkipLevels, |
| + lastBlockDocIDs = new int[skipsPerBlock]; |
| + lastBlockPosFPs = new long[skipsPerBlock]; |
| + lastBlockPayFPs = new long[skipsPerBlock]; |
| + lastBlockPosBufferUptos = new int[skipsPerBlock]; |
| + lastBlockStartOffsets = new int[skipsPerBlock]; |
| + lastBlockPayloadByteUptos = new int[skipsPerBlock]; |
| + |
| + skipWriter = new BlockSkipWriter(maxSkipLevels, |
| + skipInterval, |
| + blockSize, |
| state.segmentInfo.getDocCount(), |
| docOut, |
| posOut, |
| payOut); |
| |
| - encoded = new byte[blockSize*4 + 4]; |
| + encoded = new byte[blockSize*4]; |
| encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); |
| } |
| |
| @@ -201,7 +210,7 @@ |
| payTermStartFP = payOut.getFilePointer(); |
| } |
| } |
| - lastBlockDocID = -1; |
| + lastBlockDocIDs[0] = -1; |
| lastDocID = 0; |
| if (DEBUG) { |
| System.out.println("FPW.startTerm startFP=" + docTermStartFP); |
| @@ -211,7 +220,6 @@ |
| |
| private void writeBlock(int[] buffer, IndexOutput out) throws IOException { |
| final int header = ForUtil.compress(buffer, encodedBuffer); |
| - //System.out.println(" block has " + numBytes + " bytes"); |
| out.writeVInt(header); |
| out.writeBytes(encoded, ForUtil.getEncodedSize(header)); |
| } |
| @@ -219,74 +227,24 @@ |
| @Override |
| public void startDoc(int docID, int termDocFreq) throws IOException { |
| if (DEBUG) { |
| - System.out.println("FPW.startDoc docID=" + docID); |
| + System.out.println("FPW.startDoc docID["+docBufferUpto+"]=" + docID); |
| } |
| + final int docDelta = docID - lastDocID; |
| |
| - // nocommit do this in finishDoc... but does it fail...? |
| - // is it not always called...? |
| - if (posOut != null && saveNextPosBlock) { |
| - lastBlockPosFP = posOut.getFilePointer(); |
| - if (payOut != null) { |
| - lastBlockPayFP = payOut.getFilePointer(); |
| - } |
| - lastBlockPosBufferUpto = posBufferUpto; |
| - lastBlockStartOffset = lastStartOffset; |
| - lastBlockPayloadByteUpto = payloadByteUpto; |
| - saveNextPosBlock = false; |
| - if (DEBUG) { |
| - System.out.println(" now save lastBlockPosFP=" + lastBlockPosFP + " lastBlockPosBufferUpto=" + lastBlockPosBufferUpto + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUpto); |
| - } |
| - } |
| - |
| - final int docDelta = docID - lastDocID; |
| if (docID < 0 || (docCount > 0 && docDelta <= 0)) { |
| throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")"); |
| } |
| - lastDocID = docID; |
| |
| docDeltaBuffer[docBufferUpto] = docDelta; |
| - if (DEBUG) { |
| - System.out.println(" docDeltaBuffer[" + docBufferUpto + "]=" + docDelta); |
| - } |
| +// if (DEBUG) { |
| +// System.out.println(" docDeltaBuffer[" + docBufferUpto + "]=" + docDelta); |
| +// } |
| if (fieldHasFreqs) { |
| freqBuffer[docBufferUpto] = termDocFreq; |
| } |
| - |
| docBufferUpto++; |
| docCount++; |
| - |
| - if (docBufferUpto == blockSize) { |
| - // nocommit maybe instead of buffering skip before |
| - // writing a block based on last block's end data |
| - // ... we could buffer after writing the block? only |
| - // iffiness with that approach is it could be a |
| - // pointlness skip? like we may stop adding docs |
| - // right after that, then we have skip point AFTER |
| - // last doc. the thing is, in finishTerm we are |
| - // already sometimes adding a skip point AFTER the |
| - // last doc? |
| - if (lastBlockDocID != -1) { |
| - if (DEBUG) { |
| - System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-blockSize)); |
| - } |
| - skipWriter.bufferSkip(lastBlockDocID, docCount-blockSize, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto); |
| - } |
| - lastBlockDocID = docID; |
| - saveNextPosBlock = true; |
| - |
| - if (DEBUG) { |
| - System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer()); |
| - } |
| - writeBlock(docDeltaBuffer, docOut); |
| - if (fieldHasFreqs) { |
| - if (DEBUG) { |
| - System.out.println(" write freq block @ fp=" + docOut.getFilePointer()); |
| - } |
| - writeBlock(freqBuffer, docOut); |
| - } |
| - docBufferUpto = 0; |
| - } |
| - |
| + lastDocID = docID; |
| lastPosition = 0; |
| lastStartOffset = 0; |
| } |
| @@ -343,7 +301,46 @@ |
| } |
| |
| @Override |
| - public void finishDoc() { |
| + public void finishDoc() throws IOException { |
| + // Have collected a block of docs, should write skip data as well as |
| + // postings list for previous block |
| + if (docBufferUpto == blockSize) { |
| + skipWriter.bufferSkip(lastBlockDocIDs, docCount-blockSize, lastBlockPosFPs, lastBlockPayFPs, lastBlockPosBufferUptos, lastBlockStartOffsets, lastBlockPayloadByteUptos); |
| + if (DEBUG) { |
| + System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocIDs[0] + " docCount=" + (docCount-blockSize)); |
| + } |
| + if (DEBUG) { |
| + System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer()); |
| + } |
| + writeBlock(docDeltaBuffer, docOut); |
| + if (fieldHasFreqs) { |
| + if (DEBUG) { |
| + System.out.println(" write freq block @ fp=" + docOut.getFilePointer()); |
| + } |
| + writeBlock(freqBuffer, docOut); |
| + } |
| + docBufferUpto = 0; |
| + } |
| + |
| + // Since we don't know df for current term, we had to buffer |
| + // those skip data for each block, and when a block of docs are |
| + // collected, write them to skip file. |
| + int slice = (docBufferUpto)/skipInterval; |
| + if (docBufferUpto % skipInterval == 0) { |
| + lastBlockDocIDs[slice] = lastDocID; |
| + if (posOut != null) { |
| + if (payOut != null) { |
| + lastBlockPayFPs[slice] = payOut.getFilePointer(); |
| + } |
| + lastBlockPosFPs[slice] = posOut.getFilePointer(); |
| + lastBlockPosBufferUptos[slice] = posBufferUpto; |
| + lastBlockStartOffsets[slice] = lastStartOffset; |
| + lastBlockPayloadByteUptos[slice] = payloadByteUpto; |
| + } |
| + if (DEBUG) { |
| + System.out.println(" docBufferUpto="+docBufferUpto+" now get lastBlockDocID="+lastBlockDocIDs[slice]+" lastBlockPosFP=" + lastBlockPosFPs[slice] + " lastBlockPosBufferUpto=" + lastBlockPosBufferUptos[slice] + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUptos[slice]); |
| + } |
| + } |
| } |
| |
| private static class PendingTerm { |
| @@ -367,7 +364,6 @@ |
| /** Called when we are done adding docs to this term */ |
| @Override |
| public void finishTerm(TermStats stats) throws IOException { |
| - |
| assert stats.docFreq > 0; |
| |
| // TODO: wasteful we are counting this (counting # docs |
| @@ -378,17 +374,14 @@ |
| System.out.println("FPW.finishTerm docFreq=" + stats.docFreq); |
| } |
| |
| - // nocommit silly that skipper must write skip when we no |
| - // postings come after it, but if we don't do this, skip |
| - // reader incorrectly thinks it can read another level 0 |
| - // skip entry here!: |
| - //if (docCount > blockSize && docBufferUpto > 0) { |
| - if (docCount > blockSize) { |
| - final int lastDocCount = blockSize*(docCount/blockSize); |
| + // If there are vInt block following, we won't skip into it, |
| + // but should skip to its startpoint |
| + final int lastDocCount = blockSize*(docCount/blockSize); |
| + if (docCount > blockSize && docCount != lastDocCount) { |
| if (DEBUG) { |
| - System.out.println(" bufferSkip at finishTerm: lastDocID=" + lastBlockDocID + " docCount=" + lastDocCount); |
| + System.out.println(" bufferSkip at finishTerm: lastDocID=" + lastBlockDocIDs[0] + " docCount=" + lastDocCount); |
| } |
| - skipWriter.bufferSkip(lastBlockDocID, lastDocCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockStartOffset, lastBlockPayloadByteUpto); |
| + skipWriter.bufferSkip(lastBlockDocIDs[0], lastDocCount, lastBlockPosFPs[0], lastBlockPayFPs[0], lastBlockPosBufferUptos[0], lastBlockStartOffsets[0], lastBlockPayloadByteUptos[0]); |
| } |
| |
| if (DEBUG) { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (working copy) |
| @@ -23,12 +23,34 @@ |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.codecs.MultiLevelSkipListWriter; |
| |
| -// nocommit do we need more frequent skips at level > 0? |
| -// 128*128 is immense? may need to decouple |
| +// nocommit may need to decouple |
| // baseSkipInterval & theRestSkipInterval? |
| |
| +/** |
| + * Write skip lists with multiple levels, and support skip within block ints. |
| + * |
| + * Assume that docFreq = 28, blockSize=12, and skipInterval=3 |
| + * |
| + * | block#0 | | block#1 | |vInts| |
| + * d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) |
| + * ^ ^ ^ ^ ^ ^ ^ ^ (level 0 skip point) |
| + * |
| + * Note that skipWriter will ignore first document in block#0, since |
| + * it is useless as a skip point. Also, we'll never skip into the vInts |
| + * block, only record skip data at the start its start point(if it exist). |
| + * |
| + * For each skip point, we will record: |
| + * 1. lastDocID, |
| + * 2. its related file points(position, payload), |
| + * 3. related numbers or uptos(position, payload). |
| + * 4. start offset. |
| + * |
| + */ |
| final class BlockSkipWriter extends MultiLevelSkipListWriter { |
| private boolean DEBUG = BlockPostingsReader.DEBUG; |
| + private int skipInterval; |
| + private int blockSize; |
| + private int skipsPerBlock; |
| |
| private int[] lastSkipDoc; |
| private long[] lastSkipDocPointer; |
| @@ -52,11 +74,16 @@ |
| private boolean fieldHasOffsets; |
| private boolean fieldHasPayloads; |
| |
| - public BlockSkipWriter(int skipInterval, int maxSkipLevels, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { |
| + public BlockSkipWriter(int maxSkipLevels, int skipInterval, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { |
| super(skipInterval, maxSkipLevels, docCount); |
| this.docOut = docOut; |
| this.posOut = posOut; |
| this.payOut = payOut; |
| + this.skipInterval = skipInterval; |
| + this.blockSize = blockSize; |
| + this.skipsPerBlock = blockSize/skipInterval; |
| + assert this.skipsPerBlock > 0 : this.skipsPerBlock; |
| + assert blockSize % skipInterval == 0 : blockSize+" "+skipInterval; |
| |
| lastSkipDoc = new int[maxSkipLevels]; |
| lastSkipDocPointer = new long[maxSkipLevels]; |
| @@ -108,6 +135,23 @@ |
| this.curStartOffset = startOffset; |
| bufferSkip(numDocs); |
| } |
| + |
| + /** |
| + * Sets the values for skip data in current block |
| + */ |
| + public void bufferSkip(int[] docs, int numDocs, long[] posFPs, long[] payFPs, int[] posBufferUptos, int[] startOffsets, int[] payloadByteUptos) throws IOException { |
| + this.curDocPointer = docOut.getFilePointer(); |
| + // first doc is useless as skip point |
| + for (int i=(numDocs==0 ? 1:0); i<skipsPerBlock; i++) { |
| + this.curPosPointer = posFPs[i]; |
| + this.curPayPointer = payFPs[i]; |
| + this.curPosBufferUpto = posBufferUptos[i]; |
| + this.curPayloadByteUpto = payloadByteUptos[i]; |
| + this.curStartOffset = startOffsets[i]; |
| + this.curDoc = docs[i]; |
| + bufferSkip(numDocs+i*skipInterval); |
| + } |
| + } |
| |
| @Override |
| protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java (working copy) |
| @@ -22,7 +22,7 @@ |
| * Encode all values in normal area with fixed bit width, |
| * which is determined by the max value in this block. |
| */ |
| -public class ForUtil { |
| +public final class ForUtil { |
| protected static final int[] MASK = { 0x00000000, |
| 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, |
| 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, |
| @@ -30,6 +30,8 @@ |
| 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, |
| 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, |
| 0x7fffffff, 0xffffffff}; |
| + protected static final int blockSize = BlockPostingsFormat.DEFAULT_BLOCK_SIZE; |
| + protected static final int totalIterations = blockSize/32; |
| |
| /** Compress given int[] into Integer buffer, with For format |
| * |
| @@ -76,53 +78,73 @@ |
| |
| int numBits = ((header >> 8) & MASK[6]); |
| |
| - decompressCore(intBuffer, data, numBits); |
| + decompressFull(intBuffer, data, numBits); |
| } |
| |
| + /** Decompress given Integer buffer into int array. |
| + * |
| + * @param intBuffer integer buffer to hold compressed data |
| + * @param data int array to hold uncompressed data |
| + * @param header header for current block |
| + * @param iteration this method will decode iter*32 ints from compressed data. |
| + */ |
| + public static void decompress(IntBuffer intBuffer, int[] data, int header, int iteration) { |
| + // since this buffer is reused at upper level, rewind first |
| + intBuffer.rewind(); |
| + |
| + int numBits = ((header >> 8) & MASK[6]); |
| + |
| + decompressPart(intBuffer, data, numBits, iteration); |
| + } |
| + |
| + |
| + |
| /** |
| * IntBuffer will not be rewinded in this method, therefore |
| * caller should ensure that the position is set to the first |
| * encoded int before decoding. |
| */ |
| - static void decompressCore(IntBuffer intBuffer, int[] data, int numBits) { |
| - assert numBits<=32; |
| + static void decompressFull(IntBuffer intBuffer, int[] data, int numBits) { |
| + assert numBits<=32 : numBits; |
| assert numBits>=0; |
| + decompressPart(intBuffer, data, numBits, totalIterations); |
| + } |
| |
| - // TODO: PackedIntsDecompress is hardewired to size==128 only |
| + static void decompressPart(IntBuffer intBuffer, int[] data, int numBits, int iteration) { |
| switch(numBits) { |
| - case 0: PackedIntsDecompress.decode0(intBuffer, data); break; |
| - case 1: PackedIntsDecompress.decode1(intBuffer, data); break; |
| - case 2: PackedIntsDecompress.decode2(intBuffer, data); break; |
| - case 3: PackedIntsDecompress.decode3(intBuffer, data); break; |
| - case 4: PackedIntsDecompress.decode4(intBuffer, data); break; |
| - case 5: PackedIntsDecompress.decode5(intBuffer, data); break; |
| - case 6: PackedIntsDecompress.decode6(intBuffer, data); break; |
| - case 7: PackedIntsDecompress.decode7(intBuffer, data); break; |
| - case 8: PackedIntsDecompress.decode8(intBuffer, data); break; |
| - case 9: PackedIntsDecompress.decode9(intBuffer, data); break; |
| - case 10: PackedIntsDecompress.decode10(intBuffer, data); break; |
| - case 11: PackedIntsDecompress.decode11(intBuffer, data); break; |
| - case 12: PackedIntsDecompress.decode12(intBuffer, data); break; |
| - case 13: PackedIntsDecompress.decode13(intBuffer, data); break; |
| - case 14: PackedIntsDecompress.decode14(intBuffer, data); break; |
| - case 15: PackedIntsDecompress.decode15(intBuffer, data); break; |
| - case 16: PackedIntsDecompress.decode16(intBuffer, data); break; |
| - case 17: PackedIntsDecompress.decode17(intBuffer, data); break; |
| - case 18: PackedIntsDecompress.decode18(intBuffer, data); break; |
| - case 19: PackedIntsDecompress.decode19(intBuffer, data); break; |
| - case 20: PackedIntsDecompress.decode20(intBuffer, data); break; |
| - case 21: PackedIntsDecompress.decode21(intBuffer, data); break; |
| - case 22: PackedIntsDecompress.decode22(intBuffer, data); break; |
| - case 23: PackedIntsDecompress.decode23(intBuffer, data); break; |
| - case 24: PackedIntsDecompress.decode24(intBuffer, data); break; |
| - case 25: PackedIntsDecompress.decode25(intBuffer, data); break; |
| - case 26: PackedIntsDecompress.decode26(intBuffer, data); break; |
| - case 27: PackedIntsDecompress.decode27(intBuffer, data); break; |
| - case 28: PackedIntsDecompress.decode28(intBuffer, data); break; |
| - case 29: PackedIntsDecompress.decode29(intBuffer, data); break; |
| - case 30: PackedIntsDecompress.decode30(intBuffer, data); break; |
| - case 31: PackedIntsDecompress.decode31(intBuffer, data); break; |
| - case 32: PackedIntsDecompress.decode32(intBuffer, data); break; |
| + case 0: PackedIntsDecompress.decode0(intBuffer, data, iteration); break; |
| + case 1: PackedIntsDecompress.decode1(intBuffer, data, iteration); break; |
| + case 2: PackedIntsDecompress.decode2(intBuffer, data, iteration); break; |
| + case 3: PackedIntsDecompress.decode3(intBuffer, data, iteration); break; |
| + case 4: PackedIntsDecompress.decode4(intBuffer, data, iteration); break; |
| + case 5: PackedIntsDecompress.decode5(intBuffer, data, iteration); break; |
| + case 6: PackedIntsDecompress.decode6(intBuffer, data, iteration); break; |
| + case 7: PackedIntsDecompress.decode7(intBuffer, data, iteration); break; |
| + case 8: PackedIntsDecompress.decode8(intBuffer, data, iteration); break; |
| + case 9: PackedIntsDecompress.decode9(intBuffer, data, iteration); break; |
| + case 10: PackedIntsDecompress.decode10(intBuffer, data, iteration); break; |
| + case 11: PackedIntsDecompress.decode11(intBuffer, data, iteration); break; |
| + case 12: PackedIntsDecompress.decode12(intBuffer, data, iteration); break; |
| + case 13: PackedIntsDecompress.decode13(intBuffer, data, iteration); break; |
| + case 14: PackedIntsDecompress.decode14(intBuffer, data, iteration); break; |
| + case 15: PackedIntsDecompress.decode15(intBuffer, data, iteration); break; |
| + case 16: PackedIntsDecompress.decode16(intBuffer, data, iteration); break; |
| + case 17: PackedIntsDecompress.decode17(intBuffer, data, iteration); break; |
| + case 18: PackedIntsDecompress.decode18(intBuffer, data, iteration); break; |
| + case 19: PackedIntsDecompress.decode19(intBuffer, data, iteration); break; |
| + case 20: PackedIntsDecompress.decode20(intBuffer, data, iteration); break; |
| + case 21: PackedIntsDecompress.decode21(intBuffer, data, iteration); break; |
| + case 22: PackedIntsDecompress.decode22(intBuffer, data, iteration); break; |
| + case 23: PackedIntsDecompress.decode23(intBuffer, data, iteration); break; |
| + case 24: PackedIntsDecompress.decode24(intBuffer, data, iteration); break; |
| + case 25: PackedIntsDecompress.decode25(intBuffer, data, iteration); break; |
| + case 26: PackedIntsDecompress.decode26(intBuffer, data, iteration); break; |
| + case 27: PackedIntsDecompress.decode27(intBuffer, data, iteration); break; |
| + case 28: PackedIntsDecompress.decode28(intBuffer, data, iteration); break; |
| + case 29: PackedIntsDecompress.decode29(intBuffer, data, iteration); break; |
| + case 30: PackedIntsDecompress.decode30(intBuffer, data, iteration); break; |
| + case 31: PackedIntsDecompress.decode31(intBuffer, data, iteration); break; |
| + case 32: PackedIntsDecompress.decode32(intBuffer, data, iteration); break; |
| } |
| } |
| |
| @@ -177,23 +199,22 @@ |
| /** |
| * Generate the 4 byte header, which contains (from lsb to msb): |
| * |
| - * 8 bits for encoded block int size (excluded header, this limits DEFAULT_BLOCK_SIZE <= 2^8) |
| * 6 bits for num of frame bits (when 0, values in this block are all the same) |
| - * other bits unused |
| + * other bits for encoded block int size (excluded header) |
| * |
| */ |
| static int getHeader(int encodedSize, int numBits) { |
| - return (encodedSize) |
| - | ((numBits) << 8); |
| + return (numBits) |
| + | ((encodedSize) << 6); |
| } |
| |
| /** |
| * Expert: get metadata from header. |
| */ |
| + public static int getNumBits(int header) { |
| + return ((header & MASK[6])); |
| + } |
| public static int getEncodedSize(int header) { |
| - return ((header & MASK[8]))*4; |
| + return ((header >>> 6))*4; |
| } |
| - public static int getNumBits(int header) { |
| - return ((header >> 8) & MASK[6]); |
| - } |
| } |
| Index: lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java (revision 1369254) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java (working copy) |
| @@ -131,6 +131,7 @@ |
| setLastSkipData(level); |
| |
| numSkipped[level] += skipInterval[level]; |
| + //System.out.println("["+docCount+"]"+" skipped "+numSkipped[level]); |
| |
| if (numSkipped[level] > docCount) { |
| // this skip list is exhausted |