lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.codecs.lucene80;

 import java.io.DataInput;
 import java.io.IOException;

 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.RoaringDocIdSet;

 /**
  * Disk-based implementation of a {@link DocIdSetIterator} which can return
  * the index of the current document, i.e. the ordinal of the current document
  * among the list of documents that this iterator can return. This is useful
  * to implement sparse doc values by only having to encode values for documents
  * that actually have a value.
  * <p>Implementation-wise, this {@link DocIdSetIterator} is inspired of
  * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536}
  * documents independently and picks between 3 encodings depending on the
  * density of the range:<ul>
  *   <li>{@code ALL} if the range contains 65536 documents exactly,
  *   <li>{@code DENSE} if the range contains 4096 documents or more; in that
  *       case documents are stored in a bit set,
  *   <li>{@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are
  *       stored in a {@link DataInput#readShort() short}.
  * </ul>
  * <p>Only ranges that contain at least one value are encoded.
  * <p>This implementation uses 6 bytes per document in the worst-case, which happens
  * in the case that all ranges contain exactly one document.
  *
  *
  * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup
  * tables are used: A lookup table for block offset and index, and a rank structure
  * for DENSE block index lookups.
  *
  * The lookup table is an array of {@code int}-pairs, with a pair for each block. It allows for
  * direct jumping to the block, as opposed to iteration from the current position and forward
  * one block at a time.
  *
  * Each int-pair entry consists of 2 logical parts:
  *
  * The first 32 bit int holds the index (number of set bits in the blocks) up to just before the
  * wanted block. The maximum number of set bits is the maximum number of documents, which is < 2^31.
  *
  * The next int holds the offset in bytes into the underlying slice. As there is a maximum of 2^16
  * blocks, it follows that the maximum size of any block must not exceed 2^15 bytes to avoid
  * overflow (2^16 bytes if the int is treated as unsigned). This is currently the case, with the
  * largest block being DENSE and using 2^13 + 36 bytes.
  *
  * The cache overhead is numDocs/1024 bytes.
  *
  * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits).
  * In the case of non-existing blocks, the entry in the lookup table has index equal to the
  * previous entry and offset equal to the next non-empty block.
  *
  * The block lookup table is stored at the end of the total block structure.
  *
  *
  * The rank structure for DENSE blocks is an array of byte-pairs with an entry for each
  * sub-block (default 512 bits) out of the 65536 bits in the outer DENSE block.
  *
  * Each rank-entry states the number of set bits within the block up to the bit before the
  * bit positioned at the start of the sub-block.
  * Note that that the rank entry of the first sub-block is always 0 and that the last entry can
  * at most be 65536-2 = 65634 and thus will always fit into an byte-pair of 16 bits.
  *
  * The rank structure for a given DENSE block is stored at the beginning of the DENSE block.
  * This ensures locality and keeps logistics simple.
  *
  * @lucene.internal
  */
 final class IndexedDISI extends DocIdSetIterator {

   // jump-table time/space trade-offs to consider:
   // The block offsets and the block indexes could be stored in more compressed form with
   // two PackedInts or two MonotonicDirectReaders.
   // The DENSE ranks (default 128 shorts = 256 bytes) could likewise be compressed. But as there is
   // at least 4096 set bits in DENSE blocks, there will be at least one rank with 2^12 bits, so it
   // is doubtful if there is much to gain here.

   private static final int BLOCK_SIZE = 65536;   // The number of docIDs that a single block represents

   private static final int DENSE_BLOCK_LONGS = BLOCK_SIZE/Long.SIZE; // 1024
   public static final byte DEFAULT_DENSE_RANK_POWER = 9; // Every 512 docIDs / 8 longs

   static final int MAX_ARRAY_LENGTH = (1 << 12) - 1;

   private static void flush(
       int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException {
     assert block >= 0 && block < 65536;
     out.writeShort((short) block);
     assert cardinality > 0 && cardinality <= 65536;
     out.writeShort((short) (cardinality - 1));
     if (cardinality > MAX_ARRAY_LENGTH) {
       if (cardinality != 65536) { // all docs are set
         if (denseRankPower != -1) {
           final byte[] rank = createRank(buffer, denseRankPower);
           out.writeBytes(rank, rank.length);
         }
         for (long word : buffer.getBits()) {
           out.writeLong(word);
         }
       }
     } else {
       BitSetIterator it = new BitSetIterator(buffer, cardinality);
       for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
         out.writeShort((short) doc);
       }
     }
   }

   // Creates a DENSE rank-entry (the number of set bits up to a given point) for the buffer.
   // One rank-entry for every {@code 2^denseRankPower} bits, with each rank-entry using 2 bytes.
   // Represented as a byte[] for fast flushing and mirroring of the retrieval representation.
   private static byte[] createRank(FixedBitSet buffer, byte denseRankPower) {
     final int longsPerRank = 1 << (denseRankPower-6);
     final int rankMark = longsPerRank-1;
     final int rankIndexShift = denseRankPower-7; // 6 for the long (2^6) + 1 for 2 bytes/entry
     final byte[] rank = new byte[DENSE_BLOCK_LONGS >> rankIndexShift];
     final long[] bits = buffer.getBits();
     int bitCount = 0;
     for (int word = 0 ; word < DENSE_BLOCK_LONGS ; word++) {
       if ((word & rankMark) == 0) { // Every longsPerRank longs
         rank[word >> rankIndexShift] = (byte)(bitCount>>8);
         rank[(word >> rankIndexShift)+1] = (byte)(bitCount & 0xFF);
       }
       bitCount += Long.bitCount(bits[word]);
     }
     return rank;
   }

   /**
    * Writes the docIDs from it to out, in logical blocks, one for each 65536 docIDs in monotonically increasing
    * gap-less order. DENSE blocks uses {@link #DEFAULT_DENSE_RANK_POWER} of 9 (every 512 docIDs / 8 longs).
    * The caller must keep track of the number of jump-table entries (returned by this method) as well as the
    * denseRankPower (9 for this method) and provide them when constructing an IndexedDISI for reading.
    * @param it  the document IDs.
    * @param out destination for the blocks.
    * @throws IOException if there was an error writing to out.
    * @return the number of jump-table entries following the blocks, -1 for no entries.
    *         This should be stored in meta and used when creating an instance of IndexedDISI.
    */
   static short writeBitSet(DocIdSetIterator it, IndexOutput out) throws IOException {
     return writeBitSet(it, out, DEFAULT_DENSE_RANK_POWER);
   }

   /**
    * Writes the docIDs from it to out, in logical blocks, one for each 65536 docIDs in monotonically
    * increasing gap-less order.
    * The caller must keep track of the number of jump-table entries (returned by this method) as well as the
    * denseRankPower and provide them when constructing an IndexedDISI for reading.
    * @param it  the document IDs.
    * @param out destination for the blocks.
    * @param denseRankPower for {@link Method#DENSE} blocks, a rank will be written every {@code 2^denseRankPower} docIDs.
    *                       Values &lt; 7 (every 128 docIDs) or &gt; 15 (every 32768 docIDs) disables DENSE rank.
    *                       Recommended values are 8-12: Every 256-4096 docIDs or 4-64 longs.
    *                       {@link #DEFAULT_DENSE_RANK_POWER} is 9: Every 512 docIDs.
    *                       This should be stored in meta and used when creating an instance of IndexedDISI.
    * @throws IOException if there was an error writing to out.
    * @return the number of jump-table entries following the blocks, -1 for no entries.
    *         This should be stored in meta and used when creating an instance of IndexedDISI.
    */
   static short writeBitSet(DocIdSetIterator it, IndexOutput out, byte denseRankPower) throws IOException {
     final long origo = out.getFilePointer(); // All jumps are relative to the origo
     if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
       throw new IllegalArgumentException("Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). " +
           "The provided power was " + denseRankPower + " (every " + (int)Math.pow(2, denseRankPower) + " docIDs)");
     }
     int totalCardinality = 0;
     int blockCardinality = 0;
     final FixedBitSet buffer = new FixedBitSet(1<<16);
     int[] jumps = new int[ArrayUtil.oversize(1, Integer.BYTES*2)];
     int prevBlock = -1;
     int jumpBlockIndex = 0;

     for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
       final int block = doc >>> 16;
       if (prevBlock != -1 && block != prevBlock) {
         // Track offset+index from previous block up to current
         jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, jumpBlockIndex, prevBlock+1);
         jumpBlockIndex = prevBlock+1;
         // Flush block
         flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
         // Reset for next block
         buffer.clear(0, buffer.length());
         totalCardinality += blockCardinality;
         blockCardinality = 0;
       }
       buffer.set(doc & 0xFFFF);
       blockCardinality++;
       prevBlock = block;
     }
     if (blockCardinality > 0) {
       jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, jumpBlockIndex, prevBlock+1);
       totalCardinality += blockCardinality;
       flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
       buffer.clear(0, buffer.length());
       prevBlock++;
     }
     final int lastBlock = prevBlock == -1 ? 0 : prevBlock; // There will always be at least 1 block (NO_MORE_DOCS)
     // Last entry is a SPARSE with blockIndex == 32767 and the single entry 65535, which becomes the docID NO_MORE_DOCS
     // To avoid creating 65K jump-table entries, only a single entry is created pointing to the offset of the
     // NO_MORE_DOCS block, with the jumpBlockIndex set to the logical EMPTY block after all real blocks.
     jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, lastBlock, lastBlock+1);
     buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
     flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, denseRankPower, out);
     // offset+index jump-table stored at the end
     return flushBlockJumps(jumps, lastBlock+1, out, origo);
   }

   // Adds entries to the offset & index jump-table for blocks
   private static int[] addJumps(int[] jumps, long offset, int index, int startBlock, int endBlock) {
     assert offset < Integer.MAX_VALUE : "Logically the offset should not exceed 2^30 but was >= Integer.MAX_VALUE";
     jumps = ArrayUtil.grow(jumps, (endBlock+1)*2);
     for (int b = startBlock; b < endBlock; b++) {
       jumps[b*2] = index;
       jumps[b*2+1] = (int) offset;
     }
     return jumps;
   }

   // Flushes the offset & index jump-table for blocks. This should be the last data written to out
   // This method returns the blockCount for the blocks reachable for the jump_table or -1 for no jump-table
   private static short flushBlockJumps(int[] jumps, int blockCount, IndexOutput out, long origo) throws IOException {
     if (blockCount == 2) { // Jumps with a single real entry + NO_MORE_DOCS is just wasted space so we ignore that
       blockCount = 0;
     }
     for (int i = 0 ; i < blockCount ; i++) {
       out.writeInt(jumps[i*2]); // index
       out.writeInt(jumps[i*2+1]); // offset
     }
     // As there are at most 32k blocks, the count is a short
     // The jumpTableOffset will be at lastPos - (blockCount * Long.BYTES)
     return (short)blockCount;
   }

   // Members are pkg-private to avoid synthetic accessors when accessed from the `Method` enum

   /** The slice that stores the {@link DocIdSetIterator}. */
   final IndexInput slice;
   final int jumpTableEntryCount;
   final byte denseRankPower;
   final RandomAccessInput jumpTable; // Skip blocks of 64K bits
   final byte[] denseRankTable;
   final long cost;

   /**
    * This constructor always creates a new blockSlice and a new jumpTable from in, to ensure that operations are
    * independent from the caller.
    * See {@link #IndexedDISI(IndexInput, RandomAccessInput, int, byte, long)} for re-use of blockSlice and jumpTable.
    * @param in backing data.
    * @param offset starting offset for blocks in the backing data.
    * @param length the number of bytes holding blocks and jump-table in the backing data.
    * @param jumpTableEntryCount the number of blocks covered by the jump-table.
    *                            This must match the number returned by {@link #writeBitSet(DocIdSetIterator, IndexOutput, byte)}.
    * @param denseRankPower the number of docIDs covered by each rank entry in DENSE blocks, expressed as {@code 2^denseRankPower}.
    *                       This must match the power given in {@link #writeBitSet(DocIdSetIterator, IndexOutput, byte)}
    * @param cost normally the number of logical docIDs.
    */
   IndexedDISI(IndexInput in, long offset, long length, int jumpTableEntryCount, byte denseRankPower, long cost) throws IOException {
     this(createBlockSlice(in,"docs", offset, length, jumpTableEntryCount),
         createJumpTable(in, offset, length, jumpTableEntryCount),
         jumpTableEntryCount, denseRankPower, cost);
   }

   /**
    * This constructor allows to pass the slice and jumpTable directly in case it helps reuse.
    * see eg. Lucene80 norms producer's merge instance.
    * @param blockSlice data blocks, normally created by {@link #createBlockSlice}.
    * @param jumpTable table holding jump-data for block-skips, normally created by {@link #createJumpTable}.
    * @param jumpTableEntryCount the number of blocks covered by the jump-table.
    *                            This must match the number returned by {@link #writeBitSet(DocIdSetIterator, IndexOutput, byte)}.
    * @param denseRankPower the number of docIDs covered by each rank entry in DENSE blocks, expressed as {@code 2^denseRankPower}.
    *                       This must match the power given in {@link #writeBitSet(DocIdSetIterator, IndexOutput, byte)}
    * @param cost normally the number of logical docIDs.
    */
   IndexedDISI(IndexInput blockSlice, RandomAccessInput jumpTable, int jumpTableEntryCount, byte denseRankPower, long cost) throws IOException {
     if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
       throw new IllegalArgumentException("Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). " +
           "The provided power was " + denseRankPower + " (every " + (int)Math.pow(2, denseRankPower) + " docIDs). ");
     }

     this.slice = blockSlice;
     this.jumpTable = jumpTable;
     this.jumpTableEntryCount = jumpTableEntryCount;
     this.denseRankPower = denseRankPower;
     final int rankIndexShift = denseRankPower-7;
     this.denseRankTable = denseRankPower == -1 ? null : new byte[DENSE_BLOCK_LONGS >> rankIndexShift];
     this.cost = cost;
   }

   /**
    * Helper method for using {@link #IndexedDISI(IndexInput, RandomAccessInput, int, byte, long)}.
    * Creates a disiSlice for the IndexedDISI data blocks, without the jump-table.
    * @param slice backing data, holding both blocks and jump-table.
    * @param sliceDescription human readable slice designation.
    * @param offset relative to the backing data.
    * @param length full length of the IndexedDISI, including blocks and jump-table data.
    * @param jumpTableEntryCount the number of blocks covered by the jump-table.
    * @return a jumpTable containing the block jump-data or null if no such table exists.
    * @throws IOException if a RandomAccessInput could not be created from slice.
    */
   public static IndexInput createBlockSlice(
       IndexInput slice, String sliceDescription, long offset, long length, int jumpTableEntryCount) throws IOException {
     long jumpTableBytes = jumpTableEntryCount < 0 ? 0 : jumpTableEntryCount*Integer.BYTES*2;
     return slice.slice(sliceDescription, offset, length - jumpTableBytes);
   }

   /**
    * Helper method for using {@link #IndexedDISI(IndexInput, RandomAccessInput, int, byte, long)}.
    * Creates a RandomAccessInput covering only the jump-table data or null.
    * @param slice backing data, holding both blocks and jump-table.
    * @param offset relative to the backing data.
    * @param length full length of the IndexedDISI, including blocks and jump-table data.
    * @param jumpTableEntryCount the number of blocks covered by the jump-table.
    * @return a jumpTable containing the block jump-data or null if no such table exists.
    * @throws IOException if a RandomAccessInput could not be created from slice.
    */
   public static RandomAccessInput createJumpTable(
       IndexInput slice, long offset, long length, int jumpTableEntryCount) throws IOException {
     if (jumpTableEntryCount <= 0) {
       return null;
     } else {
       int jumpTableBytes = jumpTableEntryCount*Integer.BYTES*2;
       return slice.randomAccessSlice(offset + length - jumpTableBytes, jumpTableBytes);
     }
   }

   int block = -1;
   long blockEnd;
   long denseBitmapOffset = -1; // Only used for DENSE blocks
   int nextBlockIndex = -1;
   Method method;

   int doc = -1;
   int index = -1;

   // SPARSE variables
   boolean exists;

   // DENSE variables
   long word;
   int wordIndex = -1;
   // number of one bits encountered so far, including those of `word`
   int numberOfOnes;
   // Used with rank for jumps inside of DENSE as they are absolute instead of relative
   int denseOrigoIndex;

   // ALL variables
   int gap;

   @Override
   public int docID() {
     return doc;
   }

   @Override
   public int advance(int target) throws IOException {
     final int targetBlock = target & 0xFFFF0000;
     if (block < targetBlock) {
       advanceBlock(targetBlock);
     }
     if (block == targetBlock) {
       if (method.advanceWithinBlock(this, target)) {
         return doc;
       }
       readBlockHeader();
     }
     boolean found = method.advanceWithinBlock(this, block);
     assert found;
     return doc;
   }

   public boolean advanceExact(int target) throws IOException {
     final int targetBlock = target & 0xFFFF0000;
     if (block < targetBlock) {
       advanceBlock(targetBlock);
     }
     boolean found = block == targetBlock && method.advanceExactWithinBlock(this, target);
     this.doc = target;
     return found;
   }

   private void advanceBlock(int targetBlock) throws IOException {
     final int blockIndex = targetBlock >> 16;
     // If the destination block is 2 blocks or more ahead, we use the jump-table.
     if (jumpTable != null && blockIndex >= (block >> 16)+2) {
       // If the jumpTableEntryCount is exceeded, there are no further bits. Last entry is always NO_MORE_DOCS
       final int inRangeBlockIndex = blockIndex < jumpTableEntryCount ? blockIndex : jumpTableEntryCount-1;
       final int index = jumpTable.readInt(inRangeBlockIndex*Integer.BYTES*2);
       final int offset = jumpTable.readInt(inRangeBlockIndex*Integer.BYTES*2+Integer.BYTES);
       this.nextBlockIndex = index-1; // -1 to compensate for the always-added 1 in readBlockHeader
       slice.seek(offset);
       readBlockHeader();
       return;
     }

     // Fallback to iteration of blocks
     do {
       slice.seek(blockEnd);
       readBlockHeader();
     } while (block < targetBlock);
   }

   private void readBlockHeader() throws IOException {
     block = Short.toUnsignedInt(slice.readShort()) << 16;
     assert block >= 0;
     final int numValues = 1 + Short.toUnsignedInt(slice.readShort());
     index = nextBlockIndex;
     nextBlockIndex = index + numValues;
     if (numValues <= MAX_ARRAY_LENGTH) {
       method = Method.SPARSE;
       blockEnd = slice.getFilePointer() + (numValues << 1);
     } else if (numValues == 65536) {
       method = Method.ALL;
       blockEnd = slice.getFilePointer();
       gap = block - index - 1;
     } else {
       method = Method.DENSE;
       denseBitmapOffset = slice.getFilePointer() + (denseRankTable == null ?  0 : denseRankTable.length);
       blockEnd = denseBitmapOffset + (1 << 13);
       // Performance consideration: All rank (default 128 * 16 bits) are loaded up front. This should be fast with the
       // reusable byte[] buffer, but it is still wasted if the DENSE block is iterated in small steps.
       // If this results in too great a performance regression, a heuristic strategy might work where the rank data
       // are loaded on first in-block advance, if said advance is > X docIDs. The hope being that a small first
       // advance means that subsequent advances will be small too.
       // Another alternative is to maintain an extra slice for DENSE rank, but IndexedDISI is already slice-heavy.
       if (denseRankPower != -1) {
         slice.readBytes(denseRankTable, 0, denseRankTable.length);
       }
       wordIndex = -1;
       numberOfOnes = index + 1;
       denseOrigoIndex = numberOfOnes;
     }
   }

   @Override
   public int nextDoc() throws IOException {
     return advance(doc + 1);
   }

   public int index() {
     return index;
   }

   @Override
   public long cost() {
     return cost;
   }

   enum Method {
     SPARSE {
       @Override
       boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         // TODO: binary search
         for (; disi.index < disi.nextBlockIndex;) {
           int doc = Short.toUnsignedInt(disi.slice.readShort());
           disi.index++;
           if (doc >= targetInBlock) {
             disi.doc = disi.block | doc;
             disi.exists = true;
             return true;
           }
         }
         return false;
       }
       @Override
       boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         // TODO: binary search
         if (target == disi.doc) {
           return disi.exists;
         }
         for (; disi.index < disi.nextBlockIndex;) {
           int doc = Short.toUnsignedInt(disi.slice.readShort());
           disi.index++;
           if (doc >= targetInBlock) {
             if (doc != targetInBlock) {
               disi.index--;
               disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES);
               break;
             }
             disi.exists = true;
             return true;
           }
         }
         disi.exists = false;
         return false;
       }
     },
     DENSE {
       @Override
       boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;

         // If possible, skip ahead using the rank cache
         // If the distance between the current position and the target is < rank-longs
         // there is no sense in using rank
         if (disi.denseRankPower != -1 && targetWordIndex - disi.wordIndex >= (1 << (disi.denseRankPower-6) )) {
           rankSkip(disi, targetInBlock);
         }

         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
         }
         disi.wordIndex = targetWordIndex;

         long leftBits = disi.word >>> target;
         if (leftBits != 0L) {
           disi.doc = target + Long.numberOfTrailingZeros(leftBits);
           disi.index = disi.numberOfOnes - Long.bitCount(leftBits);
           return true;
         }

         // There were no set bits at the wanted position. Move forward until one is reached
         while (++disi.wordIndex < 1024) {
           // This could use the rank cache to skip empty spaces >= 512 bits, but it seems unrealistic
           // that such blocks would be DENSE
           disi.word = disi.slice.readLong();
           if (disi.word != 0) {
             disi.index = disi.numberOfOnes;
             disi.numberOfOnes += Long.bitCount(disi.word);
             disi.doc = disi.block | (disi.wordIndex << 6) | Long.numberOfTrailingZeros(disi.word);
             return true;
           }
         }
         // No set bits in the block at or after the wanted position.
         return false;
       }

       @Override
       boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;

         // If possible, skip ahead using the rank cache
         // If the distance between the current position and the target is < rank-longs
         // there is no sense in using rank
         if (disi.denseRankPower != -1 && targetWordIndex - disi.wordIndex >= (1 << (disi.denseRankPower-6) )) {
           rankSkip(disi, targetInBlock);
         }

         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
         }
         disi.wordIndex = targetWordIndex;

         long leftBits = disi.word >>> target;
         disi.index = disi.numberOfOnes - Long.bitCount(leftBits);
         return (leftBits & 1L) != 0;
       }


     },
     ALL {
       @Override
       boolean advanceWithinBlock(IndexedDISI disi, int target) {
         disi.doc = target;
         disi.index = target - disi.gap;
         return true;
       }
       @Override
       boolean advanceExactWithinBlock(IndexedDISI disi, int target) {
         disi.index = target - disi.gap;
         return true;
       }
     };

     /** Advance to the first doc from the block that is equal to or greater than {@code target}.
      *  Return true if there is such a doc and false otherwise. */
     abstract boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException;

     /** Advance the iterator exactly to the position corresponding to the given {@code target}
      * and return whether this document exists. */
     abstract boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException;
   }

   /**
    * If the distance between the current position and the target is > 8 words, the rank cache will
    * be used to guarantee a worst-case of 1 rank-lookup and 7 word-read-and-count-bits operations.
    * Note: This does not guarantee a skip up to target, only up to nearest rank boundary. It is the
    * responsibility of the caller to iterate further to reach target.
    * @param disi standard DISI.
    * @param targetInBlock lower 16 bits of the target
    * @throws IOException if a DISI seek failed.
    */
   private static void rankSkip(IndexedDISI disi, int targetInBlock) throws IOException {
     assert disi.denseRankPower >= 0 : disi.denseRankPower;
     // Resolve the rank as close to targetInBlock as possible (maximum distance is 8 longs)
     // Note: rankOrigoOffset is tracked on block open, so it is absolute (e.g. don't add origo)
     final int rankIndex = targetInBlock >> disi.denseRankPower; // Default is 9 (8 longs: 2^3 * 2^6 = 512 docIDs)

     final int rank =
         (disi.denseRankTable[rankIndex<<1] & 0xFF) << 8 |
         (disi.denseRankTable[(rankIndex<<1)+1] & 0xFF);

     // Position the counting logic just after the rank point
     final int rankAlignedWordIndex = rankIndex << disi.denseRankPower >> 6;
     disi.slice.seek(disi.denseBitmapOffset + rankAlignedWordIndex*Long.BYTES);
     long rankWord = disi.slice.readLong();
     int denseNOO = rank + Long.bitCount(rankWord);

     disi.wordIndex = rankAlignedWordIndex;
     disi.word = rankWord;
     disi.numberOfOnes = disi.denseOrigoIndex + denseNOO;
   }
 }