lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.util.bkd;

 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.Arrays;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.MathUtil;

 /**
  * Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree
  * previously written with {@link BKDWriter}.
  *
  * @lucene.experimental
  */
 public final class BKDReader extends PointValues {

   // Packed array of byte[] holding all split values in the full binary tree:
   final int leafNodeOffset;
   final BKDConfig config;
   final int numLeaves;
   final IndexInput in;
   final byte[] minPackedValue;
   final byte[] maxPackedValue;
   final long pointCount;
   final int docCount;
   final int version;
   final long minLeafBlockFP;

   final IndexInput packedIndex;

   /**
    * Caller must pre-seek the provided {@link IndexInput} to the index location that {@link
    * BKDWriter#finish} returned. BKD tree is always stored off-heap.
    */
   public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException {
     version =
         CodecUtil.checkHeader(
             metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
     final int numDims = metaIn.readVInt();
     final int numIndexDims;
     if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
       numIndexDims = metaIn.readVInt();
     } else {
       numIndexDims = numDims;
     }
     final int maxPointsInLeafNode = metaIn.readVInt();
     final int bytesPerDim = metaIn.readVInt();
     config = new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);

     // Read index:
     numLeaves = metaIn.readVInt();
     assert numLeaves > 0;
     leafNodeOffset = numLeaves;

     minPackedValue = new byte[config.packedIndexBytesLength];
     maxPackedValue = new byte[config.packedIndexBytesLength];

     metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
     metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);

     for (int dim = 0; dim < config.numIndexDims; dim++) {
       if (Arrays.compareUnsigned(
               minPackedValue,
               dim * config.bytesPerDim,
               dim * config.bytesPerDim + config.bytesPerDim,
               maxPackedValue,
               dim * config.bytesPerDim,
               dim * config.bytesPerDim + config.bytesPerDim)
           > 0) {
         throw new CorruptIndexException(
             "minPackedValue "
                 + new BytesRef(minPackedValue)
                 + " is > maxPackedValue "
                 + new BytesRef(maxPackedValue)
                 + " for dim="
                 + dim,
             metaIn);
       }
     }

     pointCount = metaIn.readVLong();
     docCount = metaIn.readVInt();

     int numIndexBytes = metaIn.readVInt();
     long indexStartPointer;
     if (version >= BKDWriter.VERSION_META_FILE) {
       minLeafBlockFP = metaIn.readLong();
       indexStartPointer = metaIn.readLong();
     } else {
       indexStartPointer = indexIn.getFilePointer();
       minLeafBlockFP = indexIn.readVLong();
       indexIn.seek(indexStartPointer);
     }
     this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
     this.in = dataIn;
   }

   long getMinLeafBlockFP() {
     return minLeafBlockFP;
   }

   /**
    * Used to walk the off-heap index. The format takes advantage of the limited access pattern to
    * the BKD tree at search time, i.e. starting at the root node and recursing downwards one child
    * at a time.
    *
    * @lucene.internal
    */
   public class IndexTree implements Cloneable {
     private int nodeID;
     // level is 1-based so that we can do level-1 w/o checking each time:
     private int level;
     private int splitDim;
     private final byte[][] splitPackedValueStack;
     // used to read the packed tree off-heap
     private final IndexInput in;
     // holds the minimum (left most) leaf block file pointer for each level we've recursed to:
     private final long[] leafBlockFPStack;
     // holds the address, in the off-heap index, of the right-node of each level:
     private final int[] rightNodePositions;
     // holds the splitDim for each level:
     private final int[] splitDims;
     // true if the per-dim delta we read for the node at this level is a negative offset vs. the
     // last split on this dim; this is a packed
     // 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim].
     // this will be true if the last time we
     // split on this dimension, we next pushed to the left sub-tree:
     private final boolean[] negativeDeltas;
     // holds the packed per-level split values; the intersect method uses this to save the cell
     // min/max as it recurses:
     private final byte[][] splitValuesStack;
     // scratch value to return from getPackedValue:
     private final BytesRef scratch;

     IndexTree() {
       this(packedIndex.clone(), 1, 1);
       // read root node
       readNodeData(false);
     }

     private IndexTree(IndexInput in, int nodeID, int level) {
       int treeDepth = getTreeDepth();
       splitPackedValueStack = new byte[treeDepth + 1][];
       this.nodeID = nodeID;
       this.level = level;
       splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
       leafBlockFPStack = new long[treeDepth + 1];
       rightNodePositions = new int[treeDepth + 1];
       splitValuesStack = new byte[treeDepth + 1][];
       splitDims = new int[treeDepth + 1];
       negativeDeltas = new boolean[config.numIndexDims * (treeDepth + 1)];
       this.in = in;
       splitValuesStack[0] = new byte[config.packedIndexBytesLength];
       scratch = new BytesRef();
       scratch.length = config.bytesPerDim;
     }

     public void pushLeft() {
       nodeID *= 2;
       level++;
       readNodeData(true);
     }

     /** Clone, but you are not allowed to pop up past the point where the clone happened. */
     @Override
     public IndexTree clone() {
       IndexTree index = new IndexTree(in.clone(), nodeID, level);
       // copy node data
       index.splitDim = splitDim;
       index.leafBlockFPStack[level] = leafBlockFPStack[level];
       index.rightNodePositions[level] = rightNodePositions[level];
       index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
       System.arraycopy(
           negativeDeltas,
           level * config.numIndexDims,
           index.negativeDeltas,
           level * config.numIndexDims,
           config.numIndexDims);
       index.splitDims[level] = splitDims[level];
       return index;
     }

     public void pushRight() {
       final int nodePosition = rightNodePositions[level];
       assert nodePosition >= in.getFilePointer()
           : "nodePosition = " + nodePosition + " < currentPosition=" + in.getFilePointer();
       nodeID = nodeID * 2 + 1;
       level++;
       try {
         in.seek(nodePosition);
       } catch (IOException e) {
         throw new UncheckedIOException(e);
       }
       readNodeData(false);
     }

     public void pop() {
       nodeID /= 2;
       level--;
       splitDim = splitDims[level];
       // System.out.println("  pop nodeID=" + nodeID);
     }

     public boolean isLeafNode() {
       return nodeID >= leafNodeOffset;
     }

     public boolean nodeExists() {
       return nodeID - leafNodeOffset < leafNodeOffset;
     }

     public int getNodeID() {
       return nodeID;
     }

     public byte[] getSplitPackedValue() {
       assert isLeafNode() == false;
       assert splitPackedValueStack[level] != null : "level=" + level;
       return splitPackedValueStack[level];
     }

     /** Only valid after pushLeft or pushRight, not pop! */
     public int getSplitDim() {
       assert isLeafNode() == false;
       return splitDim;
     }

     /** Only valid after pushLeft or pushRight, not pop! */
     public BytesRef getSplitDimValue() {
       assert isLeafNode() == false;
       scratch.bytes = splitValuesStack[level];
       scratch.offset = splitDim * config.bytesPerDim;
       return scratch;
     }

     /** Only valid after pushLeft or pushRight, not pop! */
     public long getLeafBlockFP() {
       assert isLeafNode() : "nodeID=" + nodeID + " is not a leaf";
       return leafBlockFPStack[level];
     }

     /** Return the number of leaves below the current node. */
     public int getNumLeaves() {
       int leftMostLeafNode = nodeID;
       while (leftMostLeafNode < leafNodeOffset) {
         leftMostLeafNode = leftMostLeafNode * 2;
       }
       int rightMostLeafNode = nodeID;
       while (rightMostLeafNode < leafNodeOffset) {
         rightMostLeafNode = rightMostLeafNode * 2 + 1;
       }
       final int numLeaves;
       if (rightMostLeafNode >= leftMostLeafNode) {
         // both are on the same level
         numLeaves = rightMostLeafNode - leftMostLeafNode + 1;
       } else {
         // left is one level deeper than right
         numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
       }
       assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + getNumLeavesSlow(nodeID);
       return numLeaves;
     }

     // for assertions
     private int getNumLeavesSlow(int node) {
       if (node >= 2 * leafNodeOffset) {
         return 0;
       } else if (node >= leafNodeOffset) {
         return 1;
       } else {
         final int leftCount = getNumLeavesSlow(node * 2);
         final int rightCount = getNumLeavesSlow(node * 2 + 1);
         return leftCount + rightCount;
       }
     }

     private void readNodeData(boolean isLeft) {
       if (splitPackedValueStack[level] == null) {
         splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
       }
       System.arraycopy(
           negativeDeltas,
           (level - 1) * config.numIndexDims,
           negativeDeltas,
           level * config.numIndexDims,
           config.numIndexDims);
       assert splitDim != -1;
       negativeDeltas[level * config.numIndexDims + splitDim] = isLeft;

       try {
         leafBlockFPStack[level] = leafBlockFPStack[level - 1];

         // read leaf block FP delta
         if (isLeft == false) {
           leafBlockFPStack[level] += in.readVLong();
         }

         if (isLeafNode()) {
           splitDim = -1;
         } else {

           // read split dim, prefix, firstDiffByteDelta encoded as int:
           int code = in.readVInt();
           splitDim = code % config.numIndexDims;
           splitDims[level] = splitDim;
           code /= config.numIndexDims;
           int prefix = code % (1 + config.bytesPerDim);
           int suffix = config.bytesPerDim - prefix;

           if (splitValuesStack[level] == null) {
             splitValuesStack[level] = new byte[config.packedIndexBytesLength];
           }
           System.arraycopy(
               splitValuesStack[level - 1],
               0,
               splitValuesStack[level],
               0,
               config.packedIndexBytesLength);
           if (suffix > 0) {
             int firstDiffByteDelta = code / (1 + config.bytesPerDim);
             if (negativeDeltas[level * config.numIndexDims + splitDim]) {
               firstDiffByteDelta = -firstDiffByteDelta;
             }
             int oldByte = splitValuesStack[level][splitDim * config.bytesPerDim + prefix] & 0xFF;
             splitValuesStack[level][splitDim * config.bytesPerDim + prefix] =
                 (byte) (oldByte + firstDiffByteDelta);
             in.readBytes(
                 splitValuesStack[level], splitDim * config.bytesPerDim + prefix + 1, suffix - 1);
           } else {
             // our split value is == last split value in this dim, which can happen when there are
             // many duplicate values
           }

           int leftNumBytes;
           if (nodeID * 2 < leafNodeOffset) {
             leftNumBytes = in.readVInt();
           } else {
             leftNumBytes = 0;
           }
           rightNodePositions[level] = Math.toIntExact(in.getFilePointer()) + leftNumBytes;
         }
       } catch (IOException e) {
         throw new UncheckedIOException(e);
       }
     }
   }

   private int getTreeDepth() {
     // First +1 because all the non-leave nodes makes another power
     // of 2; e.g. to have a fully balanced tree with 4 leaves you
     // need a depth=3 tree:

     // Second +1 because MathUtil.log computes floor of the logarithm; e.g.
     // with 5 leaves you need a depth=4 tree:
     return MathUtil.log(numLeaves, 2) + 2;
   }

   /** Used to track all state for a single call to {@link #intersect}. */
   public static final class IntersectState {
     final IndexInput in;
     final BKDReaderDocIDSetIterator scratchIterator;
     final byte[] scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue;
     final int[] commonPrefixLengths;

     final IntersectVisitor visitor;
     public final IndexTree index;

     public IntersectState(
         IndexInput in, BKDConfig config, IntersectVisitor visitor, IndexTree indexVisitor) {
       this.in = in;
       this.visitor = visitor;
       this.commonPrefixLengths = new int[config.numDims];
       this.scratchIterator = new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode);
       this.scratchDataPackedValue = new byte[config.packedBytesLength];
       this.scratchMinIndexPackedValue = new byte[config.packedIndexBytesLength];
       this.scratchMaxIndexPackedValue = new byte[config.packedIndexBytesLength];
       this.index = indexVisitor;
     }
   }

   @Override
   public void intersect(IntersectVisitor visitor) throws IOException {
     intersect(getIntersectState(visitor), minPackedValue, maxPackedValue);
   }

   @Override
   public long estimatePointCount(IntersectVisitor visitor) {
     return estimatePointCount(getIntersectState(visitor), minPackedValue, maxPackedValue);
   }

   /** Fast path: this is called when the query box fully encompasses all cells under this node. */
   private void addAll(IntersectState state, boolean grown) throws IOException {
     // System.out.println("R: addAll nodeID=" + nodeID);

     if (grown == false) {
       final long maxPointCount = (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
       if (maxPointCount
           <= Integer.MAX_VALUE) { // could be >MAX_VALUE if there are more than 2B points in total
         state.visitor.grow((int) maxPointCount);
         grown = true;
       }
     }

     if (state.index.isLeafNode()) {
       assert grown;
       // System.out.println("ADDALL");
       if (state.index.nodeExists()) {
         visitDocIDs(state.in, state.index.getLeafBlockFP(), state.visitor);
       }
       // TODO: we can assert that the first value here in fact matches what the index claimed?
     } else {
       state.index.pushLeft();
       addAll(state, grown);
       state.index.pop();

       state.index.pushRight();
       addAll(state, grown);
       state.index.pop();
     }
   }

   /** Create a new {@link IntersectState} */
   public IntersectState getIntersectState(IntersectVisitor visitor) {
     IndexTree index = new IndexTree();
     return new IntersectState(in.clone(), config, visitor, index);
   }

   /** Visits all docIDs and packed values in a single leaf block */
   public void visitLeafBlockValues(IndexTree index, IntersectState state) throws IOException {

     // Leaf node; scan and filter all points in this block:
     int count = readDocIDs(state.in, index.getLeafBlockFP(), state.scratchIterator);

     // Again, this time reading values and checking with the visitor
     visitDocValues(
         state.commonPrefixLengths,
         state.scratchDataPackedValue,
         state.scratchMinIndexPackedValue,
         state.scratchMaxIndexPackedValue,
         state.in,
         state.scratchIterator,
         count,
         state.visitor);
   }

   private void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor)
       throws IOException {
     // Leaf node
     in.seek(blockFP);

     // How many points are stored in this leaf cell:
     int count = in.readVInt();
     // No need to call grow(), it has been called up-front

     DocIdsWriter.readInts(in, count, visitor);
   }

   int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator iterator)
       throws IOException {
     in.seek(blockFP);

     // How many points are stored in this leaf cell:
     int count = in.readVInt();

     DocIdsWriter.readInts(in, count, iterator.docIDs);

     return count;
   }

   void visitDocValues(
       int[] commonPrefixLengths,
       byte[] scratchDataPackedValue,
       byte[] scratchMinIndexPackedValue,
       byte[] scratchMaxIndexPackedValue,
       IndexInput in,
       BKDReaderDocIDSetIterator scratchIterator,
       int count,
       IntersectVisitor visitor)
       throws IOException {
     if (version >= BKDWriter.VERSION_LOW_CARDINALITY_LEAVES) {
       visitDocValuesWithCardinality(
           commonPrefixLengths,
           scratchDataPackedValue,
           scratchMinIndexPackedValue,
           scratchMaxIndexPackedValue,
           in,
           scratchIterator,
           count,
           visitor);
     } else {
       visitDocValuesNoCardinality(
           commonPrefixLengths,
           scratchDataPackedValue,
           scratchMinIndexPackedValue,
           scratchMaxIndexPackedValue,
           in,
           scratchIterator,
           count,
           visitor);
     }
   }

   void visitDocValuesNoCardinality(
       int[] commonPrefixLengths,
       byte[] scratchDataPackedValue,
       byte[] scratchMinIndexPackedValue,
       byte[] scratchMaxIndexPackedValue,
       IndexInput in,
       BKDReaderDocIDSetIterator scratchIterator,
       int count,
       IntersectVisitor visitor)
       throws IOException {
     readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);

     if (config.numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
       byte[] minPackedValue = scratchMinIndexPackedValue;
       System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
       byte[] maxPackedValue = scratchMaxIndexPackedValue;
       // Copy common prefixes before reading adjusted box
       System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
       readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);

       // The index gives us range of values for each dimension, but the actual range of values
       // might be much more narrow than what the index told us, so we double check the relation
       // here, which is cheap yet might help figure out that the block either entirely matches
       // or does not match at all. This is especially more likely in the case that there are
       // multiple dimensions that have correlation, ie. splitting on one dimension also
       // significantly changes the range of values in another dimension.
       Relation r = visitor.compare(minPackedValue, maxPackedValue);
       if (r == Relation.CELL_OUTSIDE_QUERY) {
         return;
       }
       visitor.grow(count);

       if (r == Relation.CELL_INSIDE_QUERY) {
         for (int i = 0; i < count; ++i) {
           visitor.visit(scratchIterator.docIDs[i]);
         }
         return;
       }
     } else {
       visitor.grow(count);
     }

     int compressedDim = readCompressedDim(in);

     if (compressedDim == -1) {
       visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
     } else {
       visitCompressedDocValues(
           commonPrefixLengths,
           scratchDataPackedValue,
           in,
           scratchIterator,
           count,
           visitor,
           compressedDim);
     }
   }

   void visitDocValuesWithCardinality(
       int[] commonPrefixLengths,
       byte[] scratchDataPackedValue,
       byte[] scratchMinIndexPackedValue,
       byte[] scratchMaxIndexPackedValue,
       IndexInput in,
       BKDReaderDocIDSetIterator scratchIterator,
       int count,
       IntersectVisitor visitor)
       throws IOException {

     readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
     int compressedDim = readCompressedDim(in);
     if (compressedDim == -1) {
       // all values are the same
       visitor.grow(count);
       visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
     } else {
       if (config.numIndexDims != 1) {
         byte[] minPackedValue = scratchMinIndexPackedValue;
         System.arraycopy(
             scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
         byte[] maxPackedValue = scratchMaxIndexPackedValue;
         // Copy common prefixes before reading adjusted box
         System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
         readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);

         // The index gives us range of values for each dimension, but the actual range of values
         // might be much more narrow than what the index told us, so we double check the relation
         // here, which is cheap yet might help figure out that the block either entirely matches
         // or does not match at all. This is especially more likely in the case that there are
         // multiple dimensions that have correlation, ie. splitting on one dimension also
         // significantly changes the range of values in another dimension.
         Relation r = visitor.compare(minPackedValue, maxPackedValue);
         if (r == Relation.CELL_OUTSIDE_QUERY) {
           return;
         }
         visitor.grow(count);

         if (r == Relation.CELL_INSIDE_QUERY) {
           for (int i = 0; i < count; ++i) {
             visitor.visit(scratchIterator.docIDs[i]);
           }
           return;
         }
       } else {
         visitor.grow(count);
       }
       if (compressedDim == -2) {
         // low cardinality values
         visitSparseRawDocValues(
             commonPrefixLengths, scratchDataPackedValue, in, scratchIterator, count, visitor);
       } else {
         // high cardinality
         visitCompressedDocValues(
             commonPrefixLengths,
             scratchDataPackedValue,
             in,
             scratchIterator,
             count,
             visitor,
             compressedDim);
       }
     }
   }

   private void readMinMax(
       int[] commonPrefixLengths, byte[] minPackedValue, byte[] maxPackedValue, IndexInput in)
       throws IOException {
     for (int dim = 0; dim < config.numIndexDims; dim++) {
       int prefix = commonPrefixLengths[dim];
       in.readBytes(minPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
       in.readBytes(maxPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
     }
   }

   // read cardinality and point
   private void visitSparseRawDocValues(
       int[] commonPrefixLengths,
       byte[] scratchPackedValue,
       IndexInput in,
       BKDReaderDocIDSetIterator scratchIterator,
       int count,
       IntersectVisitor visitor)
       throws IOException {
     int i;
     for (i = 0; i < count; ) {
       int length = in.readVInt();
       for (int dim = 0; dim < config.numDims; dim++) {
         int prefix = commonPrefixLengths[dim];
         in.readBytes(
             scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
       }
       scratchIterator.reset(i, length);
       visitor.visit(scratchIterator, scratchPackedValue);
       i += length;
     }
     if (i != count) {
       throw new CorruptIndexException(
           "Sub blocks do not add up to the expected count: " + count + " != " + i, in);
     }
   }

   // point is under commonPrefix
   private void visitUniqueRawDocValues(
       byte[] scratchPackedValue,
       BKDReaderDocIDSetIterator scratchIterator,
       int count,
       IntersectVisitor visitor)
       throws IOException {
     scratchIterator.reset(0, count);
     visitor.visit(scratchIterator, scratchPackedValue);
   }

   private void visitCompressedDocValues(
       int[] commonPrefixLengths,
       byte[] scratchPackedValue,
       IndexInput in,
       BKDReaderDocIDSetIterator scratchIterator,
       int count,
       IntersectVisitor visitor,
       int compressedDim)
       throws IOException {
     // the byte at `compressedByteOffset` is compressed using run-length compression,
     // other suffix bytes are stored verbatim
     final int compressedByteOffset =
         compressedDim * config.bytesPerDim + commonPrefixLengths[compressedDim];
     commonPrefixLengths[compressedDim]++;
     int i;
     for (i = 0; i < count; ) {
       scratchPackedValue[compressedByteOffset] = in.readByte();
       final int runLen = Byte.toUnsignedInt(in.readByte());
       for (int j = 0; j < runLen; ++j) {
         for (int dim = 0; dim < config.numDims; dim++) {
           int prefix = commonPrefixLengths[dim];
           in.readBytes(
               scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
         }
         visitor.visit(scratchIterator.docIDs[i + j], scratchPackedValue);
       }
       i += runLen;
     }
     if (i != count) {
       throw new CorruptIndexException(
           "Sub blocks do not add up to the expected count: " + count + " != " + i, in);
     }
   }

   private int readCompressedDim(IndexInput in) throws IOException {
     int compressedDim = in.readByte();
     if (compressedDim < -2
         || compressedDim >= config.numDims
         || (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
       throw new CorruptIndexException("Got compressedDim=" + compressedDim, in);
     }
     return compressedDim;
   }

   private void readCommonPrefixes(
       int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
     for (int dim = 0; dim < config.numDims; dim++) {
       int prefix = in.readVInt();
       commonPrefixLengths[dim] = prefix;
       if (prefix > 0) {
         in.readBytes(scratchPackedValue, dim * config.bytesPerDim, prefix);
       }
       // System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
     }
   }

   private void intersect(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked)
       throws IOException {

     /*
     System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
     for(int dim=0;dim<numDims;dim++) {
       System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
     }
     */

     Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);

     if (r == Relation.CELL_OUTSIDE_QUERY) {
       // This cell is fully outside of the query shape: stop recursing
     } else if (r == Relation.CELL_INSIDE_QUERY) {
       // This cell is fully inside of the query shape: recursively add all points in this cell
       // without filtering
       addAll(state, false);
       // The cell crosses the shape boundary, or the cell fully contains the query, so we fall
       // through and do full filtering:
     } else if (state.index.isLeafNode()) {

       // TODO: we can assert that the first value here in fact matches what the index claimed?

       // In the unbalanced case it's possible the left most node only has one child:
       if (state.index.nodeExists()) {
         // Leaf node; scan and filter all points in this block:
         int count = readDocIDs(state.in, state.index.getLeafBlockFP(), state.scratchIterator);

         // Again, this time reading values and checking with the visitor
         visitDocValues(
             state.commonPrefixLengths,
             state.scratchDataPackedValue,
             state.scratchMinIndexPackedValue,
             state.scratchMaxIndexPackedValue,
             state.in,
             state.scratchIterator,
             count,
             state.visitor);
       }

     } else {

       // Non-leaf node: recurse on the split left and right nodes
       int splitDim = state.index.getSplitDim();
       assert splitDim >= 0
           : "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
       assert splitDim < config.numIndexDims
           : "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;

       byte[] splitPackedValue = state.index.getSplitPackedValue();
       BytesRef splitDimValue = state.index.getSplitDimValue();
       assert splitDimValue.length == config.bytesPerDim;
       // System.out.println("  splitDimValue=" + splitDimValue + " splitDim=" + splitDim);

       // make sure cellMin <= splitValue <= cellMax:
       assert Arrays.compareUnsigned(
                   cellMinPacked,
                   splitDim * config.bytesPerDim,
                   splitDim * config.bytesPerDim + config.bytesPerDim,
                   splitDimValue.bytes,
                   splitDimValue.offset,
                   splitDimValue.offset + config.bytesPerDim)
               <= 0
           : "config.bytesPerDim="
               + config.bytesPerDim
               + " splitDim="
               + splitDim
               + " config.numIndexDims="
               + config.numIndexDims
               + " config.numDims="
               + config.numDims;
       assert Arrays.compareUnsigned(
                   cellMaxPacked,
                   splitDim * config.bytesPerDim,
                   splitDim * config.bytesPerDim + config.bytesPerDim,
                   splitDimValue.bytes,
                   splitDimValue.offset,
                   splitDimValue.offset + config.bytesPerDim)
               >= 0
           : "config.bytesPerDim="
               + config.bytesPerDim
               + " splitDim="
               + splitDim
               + " config.numIndexDims="
               + config.numIndexDims
               + " config.numDims="
               + config.numDims;

       // Recurse on left sub-tree:
       System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
       System.arraycopy(
           splitDimValue.bytes,
           splitDimValue.offset,
           splitPackedValue,
           splitDim * config.bytesPerDim,
           config.bytesPerDim);
       state.index.pushLeft();
       intersect(state, cellMinPacked, splitPackedValue);
       state.index.pop();

       // Restore the split dim value since it may have been overwritten while recursing:
       System.arraycopy(
           splitPackedValue,
           splitDim * config.bytesPerDim,
           splitDimValue.bytes,
           splitDimValue.offset,
           config.bytesPerDim);

       // Recurse on right sub-tree:
       System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
       System.arraycopy(
           splitDimValue.bytes,
           splitDimValue.offset,
           splitPackedValue,
           splitDim * config.bytesPerDim,
           config.bytesPerDim);
       state.index.pushRight();
       intersect(state, splitPackedValue, cellMaxPacked);
       state.index.pop();
     }
   }

   private long estimatePointCount(
       IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) {

     /*
     System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
     for(int dim=0;dim<numDims;dim++) {
       System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
     }
     */

     Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);

     if (r == Relation.CELL_OUTSIDE_QUERY) {
       // This cell is fully outside of the query shape: stop recursing
       return 0L;
     } else if (r == Relation.CELL_INSIDE_QUERY) {
       return (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
     } else if (state.index.isLeafNode()) {
       // Assume half the points matched
       return (config.maxPointsInLeafNode + 1) / 2;
     } else {

       // Non-leaf node: recurse on the split left and right nodes
       int splitDim = state.index.getSplitDim();
       assert splitDim >= 0
           : "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
       assert splitDim < config.numIndexDims
           : "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;

       byte[] splitPackedValue = state.index.getSplitPackedValue();
       BytesRef splitDimValue = state.index.getSplitDimValue();
       assert splitDimValue.length == config.bytesPerDim;
       // System.out.println("  splitDimValue=" + splitDimValue + " splitDim=" + splitDim);

       // make sure cellMin <= splitValue <= cellMax:
       assert Arrays.compareUnsigned(
                   cellMinPacked,
                   splitDim * config.bytesPerDim,
                   splitDim * config.bytesPerDim + config.bytesPerDim,
                   splitDimValue.bytes,
                   splitDimValue.offset,
                   splitDimValue.offset + config.bytesPerDim)
               <= 0
           : "config.bytesPerDim="
               + config.bytesPerDim
               + " splitDim="
               + splitDim
               + " config.numIndexDims="
               + config.numIndexDims
               + " config.numDims="
               + config.numDims;
       assert Arrays.compareUnsigned(
                   cellMaxPacked,
                   splitDim * config.bytesPerDim,
                   splitDim * config.bytesPerDim + config.bytesPerDim,
                   splitDimValue.bytes,
                   splitDimValue.offset,
                   splitDimValue.offset + config.bytesPerDim)
               >= 0
           : "config.bytesPerDim="
               + config.bytesPerDim
               + " splitDim="
               + splitDim
               + " config.numIndexDims="
               + config.numIndexDims
               + " config.numDims="
               + config.numDims;

       // Recurse on left sub-tree:
       System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
       System.arraycopy(
           splitDimValue.bytes,
           splitDimValue.offset,
           splitPackedValue,
           splitDim * config.bytesPerDim,
           config.bytesPerDim);
       state.index.pushLeft();
       final long leftCost = estimatePointCount(state, cellMinPacked, splitPackedValue);
       state.index.pop();

       // Restore the split dim value since it may have been overwritten while recursing:
       System.arraycopy(
           splitPackedValue,
           splitDim * config.bytesPerDim,
           splitDimValue.bytes,
           splitDimValue.offset,
           config.bytesPerDim);

       // Recurse on right sub-tree:
       System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
       System.arraycopy(
           splitDimValue.bytes,
           splitDimValue.offset,
           splitPackedValue,
           splitDim * config.bytesPerDim,
           config.bytesPerDim);
       state.index.pushRight();
       final long rightCost = estimatePointCount(state, splitPackedValue, cellMaxPacked);
       state.index.pop();
       return leftCost + rightCost;
     }
   }

   @Override
   public byte[] getMinPackedValue() {
     return minPackedValue.clone();
   }

   @Override
   public byte[] getMaxPackedValue() {
     return maxPackedValue.clone();
   }

   @Override
   public int getNumDimensions() {
     return config.numDims;
   }

   @Override
   public int getNumIndexDimensions() {
     return config.numIndexDims;
   }

   @Override
   public int getBytesPerDimension() {
     return config.bytesPerDim;
   }

   @Override
   public long size() {
     return pointCount;
   }

   @Override
   public int getDocCount() {
     return docCount;
   }

   public boolean isLeafNode(int nodeID) {
     return nodeID >= leafNodeOffset;
   }

   /** Reusable {@link DocIdSetIterator} to handle low cardinality leaves. */
   protected static class BKDReaderDocIDSetIterator extends DocIdSetIterator {

     private int idx;
     private int length;
     private int offset;
     private int docID;
     final int[] docIDs;

     public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) {
       this.docIDs = new int[maxPointsInLeafNode];
     }

     @Override
     public int docID() {
       return docID;
     }

     private void reset(int offset, int length) {
       this.offset = offset;
       this.length = length;
       assert offset + length <= docIDs.length;
       this.docID = -1;
       this.idx = 0;
     }

     @Override
     public int nextDoc() throws IOException {
       if (idx == length) {
         docID = DocIdSetIterator.NO_MORE_DOCS;
       } else {
         docID = docIDs[offset + idx];
         idx++;
       }
       return docID;
     }

     @Override
     public int advance(int target) throws IOException {
       return slowAdvance(target);
     }

     @Override
     public long cost() {
       return length;
     }
   }
 }