blob: c0aa525f02f98492a41f40f66167cadb136cec12 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.io.UncheckedIOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FutureArrays;
import org.apache.lucene.util.MathUtil;
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
*
* @lucene.experimental */
public final class BKDReader extends PointValues {
// Packed array of byte[] holding all split values in the full binary tree:
final int leafNodeOffset;
final BKDConfig config;
final int numLeaves;
final IndexInput in;
final byte[] minPackedValue;
final byte[] maxPackedValue;
final long pointCount;
final int docCount;
final int version;
final long minLeafBlockFP;
final IndexInput packedIndex;
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned.
* BKD tree is always stored off-heap. */
public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException {
version = CodecUtil.checkHeader(metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
final int numDims = metaIn.readVInt();
final int numIndexDims;
if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
numIndexDims = metaIn.readVInt();
} else {
numIndexDims = numDims;
}
final int maxPointsInLeafNode = metaIn.readVInt();
final int bytesPerDim = metaIn.readVInt();
config = new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
// Read index:
numLeaves = metaIn.readVInt();
assert numLeaves > 0;
leafNodeOffset = numLeaves;
minPackedValue = new byte[config.packedIndexBytesLength];
maxPackedValue = new byte[config.packedIndexBytesLength];
metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);
for(int dim=0;dim<config.numIndexDims;dim++) {
if (FutureArrays.compareUnsigned(minPackedValue, dim * config.bytesPerDim, dim * config.bytesPerDim + config.bytesPerDim, maxPackedValue, dim * config.bytesPerDim, dim * config.bytesPerDim + config.bytesPerDim) > 0) {
throw new CorruptIndexException("minPackedValue " + new BytesRef(minPackedValue) + " is > maxPackedValue " + new BytesRef(maxPackedValue) + " for dim=" + dim, metaIn);
}
}
pointCount = metaIn.readVLong();
docCount = metaIn.readVInt();
int numIndexBytes = metaIn.readVInt();
long indexStartPointer;
if (version >= BKDWriter.VERSION_META_FILE) {
minLeafBlockFP = metaIn.readLong();
indexStartPointer = metaIn.readLong();
} else {
indexStartPointer = indexIn.getFilePointer();
minLeafBlockFP = indexIn.readVLong();
indexIn.seek(indexStartPointer);
}
this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
this.in = dataIn;
}
long getMinLeafBlockFP() {
return minLeafBlockFP;
}
/** Used to walk the off-heap index. The format takes advantage of the limited
* access pattern to the BKD tree at search time, i.e. starting at the root
* node and recursing downwards one child at a time.
* @lucene.internal */
public class IndexTree implements Cloneable {
private int nodeID;
// level is 1-based so that we can do level-1 w/o checking each time:
private int level;
private int splitDim;
private final byte[][] splitPackedValueStack;
// used to read the packed tree off-heap
private final IndexInput in;
// holds the minimum (left most) leaf block file pointer for each level we've recursed to:
private final long[] leafBlockFPStack;
// holds the address, in the off-heap index, of the right-node of each level:
private final int[] rightNodePositions;
// holds the splitDim for each level:
private final int[] splitDims;
// true if the per-dim delta we read for the node at this level is a negative offset vs. the last split on this dim; this is a packed
// 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim]. this will be true if the last time we
// split on this dimension, we next pushed to the left sub-tree:
private final boolean[] negativeDeltas;
// holds the packed per-level split values; the intersect method uses this to save the cell min/max as it recurses:
private final byte[][] splitValuesStack;
// scratch value to return from getPackedValue:
private final BytesRef scratch;
IndexTree() {
this(packedIndex.clone(), 1, 1);
// read root node
readNodeData(false);
}
private IndexTree(IndexInput in, int nodeID, int level) {
int treeDepth = getTreeDepth();
splitPackedValueStack = new byte[treeDepth+1][];
this.nodeID = nodeID;
this.level = level;
splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
leafBlockFPStack = new long[treeDepth+1];
rightNodePositions = new int[treeDepth+1];
splitValuesStack = new byte[treeDepth+1][];
splitDims = new int[treeDepth+1];
negativeDeltas = new boolean[config.numIndexDims*(treeDepth+1)];
this.in = in;
splitValuesStack[0] = new byte[config.packedIndexBytesLength];
scratch = new BytesRef();
scratch.length = config.bytesPerDim;
}
public void pushLeft() {
nodeID *= 2;
level++;
readNodeData(true);
}
/** Clone, but you are not allowed to pop up past the point where the clone happened. */
@Override
public IndexTree clone() {
IndexTree index = new IndexTree(in.clone(), nodeID, level);
// copy node data
index.splitDim = splitDim;
index.leafBlockFPStack[level] = leafBlockFPStack[level];
index.rightNodePositions[level] = rightNodePositions[level];
index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
System.arraycopy(negativeDeltas, level*config.numIndexDims, index.negativeDeltas, level*config.numIndexDims, config.numIndexDims);
index.splitDims[level] = splitDims[level];
return index;
}
public void pushRight() {
final int nodePosition = rightNodePositions[level];
assert nodePosition >= in.getFilePointer() : "nodePosition = " + nodePosition + " < currentPosition=" + in.getFilePointer();
nodeID = nodeID * 2 + 1;
level++;
try {
in.seek(nodePosition);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
readNodeData(false);
}
public void pop() {
nodeID /= 2;
level--;
splitDim = splitDims[level];
//System.out.println(" pop nodeID=" + nodeID);
}
public boolean isLeafNode() {
return nodeID >= leafNodeOffset;
}
public boolean nodeExists() {
return nodeID - leafNodeOffset < leafNodeOffset;
}
public int getNodeID() {
return nodeID;
}
public byte[] getSplitPackedValue() {
assert isLeafNode() == false;
assert splitPackedValueStack[level] != null: "level=" + level;
return splitPackedValueStack[level];
}
/** Only valid after pushLeft or pushRight, not pop! */
public int getSplitDim() {
assert isLeafNode() == false;
return splitDim;
}
/** Only valid after pushLeft or pushRight, not pop! */
public BytesRef getSplitDimValue() {
assert isLeafNode() == false;
scratch.bytes = splitValuesStack[level];
scratch.offset = splitDim * config.bytesPerDim;
return scratch;
}
/** Only valid after pushLeft or pushRight, not pop! */
public long getLeafBlockFP() {
assert isLeafNode(): "nodeID=" + nodeID + " is not a leaf";
return leafBlockFPStack[level];
}
/** Return the number of leaves below the current node. */
public int getNumLeaves() {
int leftMostLeafNode = nodeID;
while (leftMostLeafNode < leafNodeOffset) {
leftMostLeafNode = leftMostLeafNode * 2;
}
int rightMostLeafNode = nodeID;
while (rightMostLeafNode < leafNodeOffset) {
rightMostLeafNode = rightMostLeafNode * 2 + 1;
}
final int numLeaves;
if (rightMostLeafNode >= leftMostLeafNode) {
// both are on the same level
numLeaves = rightMostLeafNode - leftMostLeafNode + 1;
} else {
// left is one level deeper than right
numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
}
assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + getNumLeavesSlow(nodeID);
return numLeaves;
}
// for assertions
private int getNumLeavesSlow(int node) {
if (node >= 2 * leafNodeOffset) {
return 0;
} else if (node >= leafNodeOffset) {
return 1;
} else {
final int leftCount = getNumLeavesSlow(node * 2);
final int rightCount = getNumLeavesSlow(node * 2 + 1);
return leftCount + rightCount;
}
}
private void readNodeData(boolean isLeft) {
if (splitPackedValueStack[level] == null) {
splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
}
System.arraycopy(negativeDeltas, (level-1)*config.numIndexDims, negativeDeltas, level*config.numIndexDims, config.numIndexDims);
assert splitDim != -1;
negativeDeltas[level*config.numIndexDims+splitDim] = isLeft;
try {
leafBlockFPStack[level] = leafBlockFPStack[level - 1];
// read leaf block FP delta
if (isLeft == false) {
leafBlockFPStack[level] += in.readVLong();
}
if (isLeafNode()) {
splitDim = -1;
} else {
// read split dim, prefix, firstDiffByteDelta encoded as int:
int code = in.readVInt();
splitDim = code % config.numIndexDims;
splitDims[level] = splitDim;
code /= config.numIndexDims;
int prefix = code % (1 + config.bytesPerDim);
int suffix = config.bytesPerDim - prefix;
if (splitValuesStack[level] == null) {
splitValuesStack[level] = new byte[config.packedIndexBytesLength];
}
System.arraycopy(splitValuesStack[level - 1], 0, splitValuesStack[level], 0, config.packedIndexBytesLength);
if (suffix > 0) {
int firstDiffByteDelta = code / (1 + config.bytesPerDim);
if (negativeDeltas[level * config.numIndexDims + splitDim]) {
firstDiffByteDelta = -firstDiffByteDelta;
}
int oldByte = splitValuesStack[level][splitDim * config.bytesPerDim + prefix] & 0xFF;
splitValuesStack[level][splitDim * config.bytesPerDim + prefix] = (byte) (oldByte + firstDiffByteDelta);
in.readBytes(splitValuesStack[level], splitDim * config.bytesPerDim + prefix + 1, suffix - 1);
} else {
// our split value is == last split value in this dim, which can happen when there are many duplicate values
}
int leftNumBytes;
if (nodeID * 2 < leafNodeOffset) {
leftNumBytes = in.readVInt();
} else {
leftNumBytes = 0;
}
rightNodePositions[level] = Math.toIntExact(in.getFilePointer()) + leftNumBytes;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
private int getTreeDepth() {
// First +1 because all the non-leave nodes makes another power
// of 2; e.g. to have a fully balanced tree with 4 leaves you
// need a depth=3 tree:
// Second +1 because MathUtil.log computes floor of the logarithm; e.g.
// with 5 leaves you need a depth=4 tree:
return MathUtil.log(numLeaves, 2) + 2;
}
/** Used to track all state for a single call to {@link #intersect}. */
public static final class IntersectState {
final IndexInput in;
final BKDReaderDocIDSetIterator scratchIterator;
final byte[] scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue;
final int[] commonPrefixLengths;
final IntersectVisitor visitor;
public final IndexTree index;
public IntersectState(IndexInput in,
BKDConfig config,
IntersectVisitor visitor,
IndexTree indexVisitor) {
this.in = in;
this.visitor = visitor;
this.commonPrefixLengths = new int[config.numDims];
this.scratchIterator = new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode);
this.scratchDataPackedValue = new byte[config.packedBytesLength];
this.scratchMinIndexPackedValue = new byte[config.packedIndexBytesLength];
this.scratchMaxIndexPackedValue = new byte[config.packedIndexBytesLength];
this.index = indexVisitor;
}
}
@Override
public void intersect(IntersectVisitor visitor) throws IOException {
intersect(getIntersectState(visitor), minPackedValue, maxPackedValue);
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatePointCount(getIntersectState(visitor), minPackedValue, maxPackedValue);
}
/** Fast path: this is called when the query box fully encompasses all cells under this node. */
private void addAll(IntersectState state, boolean grown) throws IOException {
//System.out.println("R: addAll nodeID=" + nodeID);
if (grown == false) {
final long maxPointCount = (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
if (maxPointCount <= Integer.MAX_VALUE) { // could be >MAX_VALUE if there are more than 2B points in total
state.visitor.grow((int) maxPointCount);
grown = true;
}
}
if (state.index.isLeafNode()) {
assert grown;
//System.out.println("ADDALL");
if (state.index.nodeExists()) {
visitDocIDs(state.in, state.index.getLeafBlockFP(), state.visitor);
}
// TODO: we can assert that the first value here in fact matches what the index claimed?
} else {
state.index.pushLeft();
addAll(state, grown);
state.index.pop();
state.index.pushRight();
addAll(state, grown);
state.index.pop();
}
}
/** Create a new {@link IntersectState} */
public IntersectState getIntersectState(IntersectVisitor visitor) {
IndexTree index = new IndexTree();
return new IntersectState(in.clone(), config, visitor, index);
}
/** Visits all docIDs and packed values in a single leaf block */
public void visitLeafBlockValues(IndexTree index, IntersectState state) throws IOException {
// Leaf node; scan and filter all points in this block:
int count = readDocIDs(state.in, index.getLeafBlockFP(), state.scratchIterator);
// Again, this time reading values and checking with the visitor
visitDocValues(state.commonPrefixLengths, state.scratchDataPackedValue, state.scratchMinIndexPackedValue, state.scratchMaxIndexPackedValue, state.in, state.scratchIterator, count, state.visitor);
}
private void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
// Leaf node
in.seek(blockFP);
// How many points are stored in this leaf cell:
int count = in.readVInt();
// No need to call grow(), it has been called up-front
DocIdsWriter.readInts(in, count, visitor);
}
int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator iterator) throws IOException {
in.seek(blockFP);
// How many points are stored in this leaf cell:
int count = in.readVInt();
DocIdsWriter.readInts(in, count, iterator.docIDs);
return count;
}
void visitDocValues(int[] commonPrefixLengths, byte[] scratchDataPackedValue, byte[] scratchMinIndexPackedValue, byte[] scratchMaxIndexPackedValue,
IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor) throws IOException {
if (version >= BKDWriter.VERSION_LOW_CARDINALITY_LEAVES) {
visitDocValuesWithCardinality(commonPrefixLengths, scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue, in, scratchIterator, count, visitor);
} else {
visitDocValuesNoCardinality(commonPrefixLengths, scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue, in, scratchIterator, count, visitor);
}
}
void visitDocValuesNoCardinality(int[] commonPrefixLengths, byte[] scratchDataPackedValue, byte[] scratchMinIndexPackedValue, byte[] scratchMaxIndexPackedValue,
IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor) throws IOException {
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
if (config.numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
// Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
// The index gives us range of values for each dimension, but the actual range of values
// might be much more narrow than what the index told us, so we double check the relation
// here, which is cheap yet might help figure out that the block either entirely matches
// or does not match at all. This is especially more likely in the case that there are
// multiple dimensions that have correlation, ie. splitting on one dimension also
// significantly changes the range of values in another dimension.
Relation r = visitor.compare(minPackedValue, maxPackedValue);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return;
}
visitor.grow(count);
if (r == Relation.CELL_INSIDE_QUERY) {
for (int i = 0; i < count; ++i) {
visitor.visit(scratchIterator.docIDs[i]);
}
return;
}
} else {
visitor.grow(count);
}
int compressedDim = readCompressedDim(in);
if (compressedDim == -1) {
visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
} else {
visitCompressedDocValues(commonPrefixLengths, scratchDataPackedValue, in, scratchIterator, count, visitor, compressedDim);
}
}
void visitDocValuesWithCardinality(int[] commonPrefixLengths, byte[] scratchDataPackedValue, byte[] scratchMinIndexPackedValue, byte[] scratchMaxIndexPackedValue,
IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor) throws IOException {
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
int compressedDim = readCompressedDim(in);
if (compressedDim == -1) {
// all values are the same
visitor.grow(count);
visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
} else {
if (config.numIndexDims != 1) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
// Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
// The index gives us range of values for each dimension, but the actual range of values
// might be much more narrow than what the index told us, so we double check the relation
// here, which is cheap yet might help figure out that the block either entirely matches
// or does not match at all. This is especially more likely in the case that there are
// multiple dimensions that have correlation, ie. splitting on one dimension also
// significantly changes the range of values in another dimension.
Relation r = visitor.compare(minPackedValue, maxPackedValue);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return;
}
visitor.grow(count);
if (r == Relation.CELL_INSIDE_QUERY) {
for (int i = 0; i < count; ++i) {
visitor.visit(scratchIterator.docIDs[i]);
}
return;
}
} else {
visitor.grow(count);
}
if (compressedDim == -2) {
// low cardinality values
visitSparseRawDocValues(commonPrefixLengths, scratchDataPackedValue, in, scratchIterator, count, visitor);
} else {
// high cardinality
visitCompressedDocValues(commonPrefixLengths, scratchDataPackedValue, in, scratchIterator, count, visitor, compressedDim);
}
}
}
private void readMinMax(int[] commonPrefixLengths, byte[] minPackedValue, byte[] maxPackedValue, IndexInput in) throws IOException {
for (int dim = 0; dim < config.numIndexDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(minPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
in.readBytes(maxPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
}
// read cardinality and point
private void visitSparseRawDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor) throws IOException {
int i;
for (i = 0; i < count;) {
int length = in.readVInt();
for(int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
scratchIterator.reset(i, length);
visitor.visit(scratchIterator, scratchPackedValue);
i += length;
}
if (i != count) {
throw new CorruptIndexException("Sub blocks do not add up to the expected count: " + count + " != " + i, in);
}
}
// point is under commonPrefix
private void visitUniqueRawDocValues(byte[] scratchPackedValue, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor) throws IOException {
scratchIterator.reset(0, count);
visitor.visit(scratchIterator, scratchPackedValue);
}
private void visitCompressedDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor, int compressedDim) throws IOException {
// the byte at `compressedByteOffset` is compressed using run-length compression,
// other suffix bytes are stored verbatim
final int compressedByteOffset = compressedDim * config.bytesPerDim + commonPrefixLengths[compressedDim];
commonPrefixLengths[compressedDim]++;
int i;
for (i = 0; i < count; ) {
scratchPackedValue[compressedByteOffset] = in.readByte();
final int runLen = Byte.toUnsignedInt(in.readByte());
for (int j = 0; j < runLen; ++j) {
for(int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
visitor.visit(scratchIterator.docIDs[i+j], scratchPackedValue);
}
i += runLen;
}
if (i != count) {
throw new CorruptIndexException("Sub blocks do not add up to the expected count: " + count + " != " + i, in);
}
}
private int readCompressedDim(IndexInput in) throws IOException {
int compressedDim = in.readByte();
if (compressedDim < -2 || compressedDim >= config.numDims || (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
throw new CorruptIndexException("Got compressedDim="+compressedDim, in);
}
return compressedDim;
}
private void readCommonPrefixes(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
for(int dim=0;dim<config.numDims;dim++) {
int prefix = in.readVInt();
commonPrefixLengths[dim] = prefix;
if (prefix > 0) {
in.readBytes(scratchPackedValue, dim*config.bytesPerDim, prefix);
}
//System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
}
}
private void intersect(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
/*
System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
}
*/
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.CELL_OUTSIDE_QUERY) {
// This cell is fully outside of the query shape: stop recursing
} else if (r == Relation.CELL_INSIDE_QUERY) {
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
addAll(state, false);
// The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering:
} else if (state.index.isLeafNode()) {
// TODO: we can assert that the first value here in fact matches what the index claimed?
// In the unbalanced case it's possible the left most node only has one child:
if (state.index.nodeExists()) {
// Leaf node; scan and filter all points in this block:
int count = readDocIDs(state.in, state.index.getLeafBlockFP(), state.scratchIterator);
// Again, this time reading values and checking with the visitor
visitDocValues(state.commonPrefixLengths, state.scratchDataPackedValue, state.scratchMinIndexPackedValue, state.scratchMaxIndexPackedValue, state.in, state.scratchIterator, count, state.visitor);
}
} else {
// Non-leaf node: recurse on the split left and right nodes
int splitDim = state.index.getSplitDim();
assert splitDim >= 0: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
assert splitDim < config.numIndexDims: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
byte[] splitPackedValue = state.index.getSplitPackedValue();
BytesRef splitDimValue = state.index.getSplitDimValue();
assert splitDimValue.length == config.bytesPerDim;
//System.out.println(" splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
// make sure cellMin <= splitValue <= cellMax:
assert FutureArrays.compareUnsigned(cellMinPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) <= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
assert FutureArrays.compareUnsigned(cellMaxPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) >= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushLeft();
intersect(state, cellMinPacked, splitPackedValue);
state.index.pop();
// Restore the split dim value since it may have been overwritten while recursing:
System.arraycopy(splitPackedValue, splitDim*config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, config.bytesPerDim);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushRight();
intersect(state, splitPackedValue, cellMaxPacked);
state.index.pop();
}
}
private long estimatePointCount(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) {
/*
System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
}
*/
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.CELL_OUTSIDE_QUERY) {
// This cell is fully outside of the query shape: stop recursing
return 0L;
} else if (r == Relation.CELL_INSIDE_QUERY) {
return (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
} else if (state.index.isLeafNode()) {
// Assume half the points matched
return (config.maxPointsInLeafNode + 1) / 2;
} else {
// Non-leaf node: recurse on the split left and right nodes
int splitDim = state.index.getSplitDim();
assert splitDim >= 0: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
assert splitDim < config.numIndexDims: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
byte[] splitPackedValue = state.index.getSplitPackedValue();
BytesRef splitDimValue = state.index.getSplitDimValue();
assert splitDimValue.length == config.bytesPerDim;
//System.out.println(" splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
// make sure cellMin <= splitValue <= cellMax:
assert FutureArrays.compareUnsigned(cellMinPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) <= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
assert FutureArrays.compareUnsigned(cellMaxPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) >= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushLeft();
final long leftCost = estimatePointCount(state, cellMinPacked, splitPackedValue);
state.index.pop();
// Restore the split dim value since it may have been overwritten while recursing:
System.arraycopy(splitPackedValue, splitDim*config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, config.bytesPerDim);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushRight();
final long rightCost = estimatePointCount(state, splitPackedValue, cellMaxPacked);
state.index.pop();
return leftCost + rightCost;
}
}
@Override
public byte[] getMinPackedValue() {
return minPackedValue.clone();
}
@Override
public byte[] getMaxPackedValue() {
return maxPackedValue.clone();
}
@Override
public int getNumDimensions() {
return config.numDims;
}
@Override
public int getNumIndexDimensions() {
return config.numIndexDims;
}
@Override
public int getBytesPerDimension() {
return config.bytesPerDim;
}
@Override
public long size() {
return pointCount;
}
@Override
public int getDocCount() {
return docCount;
}
public boolean isLeafNode(int nodeID) {
return nodeID >= leafNodeOffset;
}
/**
* Reusable {@link DocIdSetIterator} to handle low cardinality leaves. */
protected static class BKDReaderDocIDSetIterator extends DocIdSetIterator {
private int idx;
private int length;
private int offset;
private int docID;
final int[] docIDs;
public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) {
this.docIDs = new int[maxPointsInLeafNode];
}
@Override
public int docID() {
return docID;
}
private void reset(int offset, int length) {
this.offset = offset;
this.length = length;
assert offset + length <= docIDs.length;
this.docID = -1;
this.idx = 0;
}
@Override
public int nextDoc() throws IOException {
if (idx == length) {
docID = DocIdSetIterator.NO_MORE_DOCS;
} else {
docID = docIDs[offset + idx];
idx++;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}
@Override
public long cost() {
return length;
}
}
}