blob: 6a9683d2c2e83701e8e05b658bf40771d4c4984f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MathUtil;
/**
* Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree
* previously written with {@link BKDWriter}.
*
* @lucene.experimental
*/
public final class BKDReader extends PointValues {
// Packed array of byte[] holding all split values in the full binary tree:
final int leafNodeOffset;
final BKDConfig config;
final int numLeaves;
final IndexInput in;
final byte[] minPackedValue;
final byte[] maxPackedValue;
final long pointCount;
final int docCount;
final int version;
final long minLeafBlockFP;
final IndexInput packedIndex;
/**
* Caller must pre-seek the provided {@link IndexInput} to the index location that {@link
* BKDWriter#finish} returned. BKD tree is always stored off-heap.
*/
public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException {
version =
CodecUtil.checkHeader(
metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
final int numDims = metaIn.readVInt();
final int numIndexDims;
if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
numIndexDims = metaIn.readVInt();
} else {
numIndexDims = numDims;
}
final int maxPointsInLeafNode = metaIn.readVInt();
final int bytesPerDim = metaIn.readVInt();
config = new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
// Read index:
numLeaves = metaIn.readVInt();
assert numLeaves > 0;
leafNodeOffset = numLeaves;
minPackedValue = new byte[config.packedIndexBytesLength];
maxPackedValue = new byte[config.packedIndexBytesLength];
metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);
for (int dim = 0; dim < config.numIndexDims; dim++) {
if (Arrays.compareUnsigned(
minPackedValue,
dim * config.bytesPerDim,
dim * config.bytesPerDim + config.bytesPerDim,
maxPackedValue,
dim * config.bytesPerDim,
dim * config.bytesPerDim + config.bytesPerDim)
> 0) {
throw new CorruptIndexException(
"minPackedValue "
+ new BytesRef(minPackedValue)
+ " is > maxPackedValue "
+ new BytesRef(maxPackedValue)
+ " for dim="
+ dim,
metaIn);
}
}
pointCount = metaIn.readVLong();
docCount = metaIn.readVInt();
int numIndexBytes = metaIn.readVInt();
long indexStartPointer;
if (version >= BKDWriter.VERSION_META_FILE) {
minLeafBlockFP = metaIn.readLong();
indexStartPointer = metaIn.readLong();
} else {
indexStartPointer = indexIn.getFilePointer();
minLeafBlockFP = indexIn.readVLong();
indexIn.seek(indexStartPointer);
}
this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
this.in = dataIn;
}
long getMinLeafBlockFP() {
return minLeafBlockFP;
}
/**
* Used to walk the off-heap index. The format takes advantage of the limited access pattern to
* the BKD tree at search time, i.e. starting at the root node and recursing downwards one child
* at a time.
*
* @lucene.internal
*/
public class IndexTree implements Cloneable {
private int nodeID;
// level is 1-based so that we can do level-1 w/o checking each time:
private int level;
private int splitDim;
private final byte[][] splitPackedValueStack;
// used to read the packed tree off-heap
private final IndexInput in;
// holds the minimum (left most) leaf block file pointer for each level we've recursed to:
private final long[] leafBlockFPStack;
// holds the address, in the off-heap index, of the right-node of each level:
private final int[] rightNodePositions;
// holds the splitDim for each level:
private final int[] splitDims;
// true if the per-dim delta we read for the node at this level is a negative offset vs. the
// last split on this dim; this is a packed
// 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim].
// this will be true if the last time we
// split on this dimension, we next pushed to the left sub-tree:
private final boolean[] negativeDeltas;
// holds the packed per-level split values; the intersect method uses this to save the cell
// min/max as it recurses:
private final byte[][] splitValuesStack;
// scratch value to return from getPackedValue:
private final BytesRef scratch;
IndexTree() {
this(packedIndex.clone(), 1, 1);
// read root node
readNodeData(false);
}
private IndexTree(IndexInput in, int nodeID, int level) {
int treeDepth = getTreeDepth();
splitPackedValueStack = new byte[treeDepth + 1][];
this.nodeID = nodeID;
this.level = level;
splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
leafBlockFPStack = new long[treeDepth + 1];
rightNodePositions = new int[treeDepth + 1];
splitValuesStack = new byte[treeDepth + 1][];
splitDims = new int[treeDepth + 1];
negativeDeltas = new boolean[config.numIndexDims * (treeDepth + 1)];
this.in = in;
splitValuesStack[0] = new byte[config.packedIndexBytesLength];
scratch = new BytesRef();
scratch.length = config.bytesPerDim;
}
public void pushLeft() {
nodeID *= 2;
level++;
readNodeData(true);
}
/** Clone, but you are not allowed to pop up past the point where the clone happened. */
@Override
public IndexTree clone() {
IndexTree index = new IndexTree(in.clone(), nodeID, level);
// copy node data
index.splitDim = splitDim;
index.leafBlockFPStack[level] = leafBlockFPStack[level];
index.rightNodePositions[level] = rightNodePositions[level];
index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
System.arraycopy(
negativeDeltas,
level * config.numIndexDims,
index.negativeDeltas,
level * config.numIndexDims,
config.numIndexDims);
index.splitDims[level] = splitDims[level];
return index;
}
public void pushRight() {
final int nodePosition = rightNodePositions[level];
assert nodePosition >= in.getFilePointer()
: "nodePosition = " + nodePosition + " < currentPosition=" + in.getFilePointer();
nodeID = nodeID * 2 + 1;
level++;
try {
in.seek(nodePosition);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
readNodeData(false);
}
public void pop() {
nodeID /= 2;
level--;
splitDim = splitDims[level];
// System.out.println(" pop nodeID=" + nodeID);
}
public boolean isLeafNode() {
return nodeID >= leafNodeOffset;
}
public boolean nodeExists() {
return nodeID - leafNodeOffset < leafNodeOffset;
}
public int getNodeID() {
return nodeID;
}
public byte[] getSplitPackedValue() {
assert isLeafNode() == false;
assert splitPackedValueStack[level] != null : "level=" + level;
return splitPackedValueStack[level];
}
/** Only valid after pushLeft or pushRight, not pop! */
public int getSplitDim() {
assert isLeafNode() == false;
return splitDim;
}
/** Only valid after pushLeft or pushRight, not pop! */
public BytesRef getSplitDimValue() {
assert isLeafNode() == false;
scratch.bytes = splitValuesStack[level];
scratch.offset = splitDim * config.bytesPerDim;
return scratch;
}
/** Only valid after pushLeft or pushRight, not pop! */
public long getLeafBlockFP() {
assert isLeafNode() : "nodeID=" + nodeID + " is not a leaf";
return leafBlockFPStack[level];
}
/** Return the number of leaves below the current node. */
public int getNumLeaves() {
int leftMostLeafNode = nodeID;
while (leftMostLeafNode < leafNodeOffset) {
leftMostLeafNode = leftMostLeafNode * 2;
}
int rightMostLeafNode = nodeID;
while (rightMostLeafNode < leafNodeOffset) {
rightMostLeafNode = rightMostLeafNode * 2 + 1;
}
final int numLeaves;
if (rightMostLeafNode >= leftMostLeafNode) {
// both are on the same level
numLeaves = rightMostLeafNode - leftMostLeafNode + 1;
} else {
// left is one level deeper than right
numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
}
assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + getNumLeavesSlow(nodeID);
return numLeaves;
}
// for assertions
private int getNumLeavesSlow(int node) {
if (node >= 2 * leafNodeOffset) {
return 0;
} else if (node >= leafNodeOffset) {
return 1;
} else {
final int leftCount = getNumLeavesSlow(node * 2);
final int rightCount = getNumLeavesSlow(node * 2 + 1);
return leftCount + rightCount;
}
}
private void readNodeData(boolean isLeft) {
if (splitPackedValueStack[level] == null) {
splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
}
System.arraycopy(
negativeDeltas,
(level - 1) * config.numIndexDims,
negativeDeltas,
level * config.numIndexDims,
config.numIndexDims);
assert splitDim != -1;
negativeDeltas[level * config.numIndexDims + splitDim] = isLeft;
try {
leafBlockFPStack[level] = leafBlockFPStack[level - 1];
// read leaf block FP delta
if (isLeft == false) {
leafBlockFPStack[level] += in.readVLong();
}
if (isLeafNode()) {
splitDim = -1;
} else {
// read split dim, prefix, firstDiffByteDelta encoded as int:
int code = in.readVInt();
splitDim = code % config.numIndexDims;
splitDims[level] = splitDim;
code /= config.numIndexDims;
int prefix = code % (1 + config.bytesPerDim);
int suffix = config.bytesPerDim - prefix;
if (splitValuesStack[level] == null) {
splitValuesStack[level] = new byte[config.packedIndexBytesLength];
}
System.arraycopy(
splitValuesStack[level - 1],
0,
splitValuesStack[level],
0,
config.packedIndexBytesLength);
if (suffix > 0) {
int firstDiffByteDelta = code / (1 + config.bytesPerDim);
if (negativeDeltas[level * config.numIndexDims + splitDim]) {
firstDiffByteDelta = -firstDiffByteDelta;
}
int oldByte = splitValuesStack[level][splitDim * config.bytesPerDim + prefix] & 0xFF;
splitValuesStack[level][splitDim * config.bytesPerDim + prefix] =
(byte) (oldByte + firstDiffByteDelta);
in.readBytes(
splitValuesStack[level], splitDim * config.bytesPerDim + prefix + 1, suffix - 1);
} else {
// our split value is == last split value in this dim, which can happen when there are
// many duplicate values
}
int leftNumBytes;
if (nodeID * 2 < leafNodeOffset) {
leftNumBytes = in.readVInt();
} else {
leftNumBytes = 0;
}
rightNodePositions[level] = Math.toIntExact(in.getFilePointer()) + leftNumBytes;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
private int getTreeDepth() {
// First +1 because all the non-leave nodes makes another power
// of 2; e.g. to have a fully balanced tree with 4 leaves you
// need a depth=3 tree:
// Second +1 because MathUtil.log computes floor of the logarithm; e.g.
// with 5 leaves you need a depth=4 tree:
return MathUtil.log(numLeaves, 2) + 2;
}
/** Used to track all state for a single call to {@link #intersect}. */
public static final class IntersectState {
final IndexInput in;
final BKDReaderDocIDSetIterator scratchIterator;
final byte[] scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue;
final int[] commonPrefixLengths;
final IntersectVisitor visitor;
public final IndexTree index;
public IntersectState(
IndexInput in, BKDConfig config, IntersectVisitor visitor, IndexTree indexVisitor) {
this.in = in;
this.visitor = visitor;
this.commonPrefixLengths = new int[config.numDims];
this.scratchIterator = new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode);
this.scratchDataPackedValue = new byte[config.packedBytesLength];
this.scratchMinIndexPackedValue = new byte[config.packedIndexBytesLength];
this.scratchMaxIndexPackedValue = new byte[config.packedIndexBytesLength];
this.index = indexVisitor;
}
}
@Override
public void intersect(IntersectVisitor visitor) throws IOException {
intersect(getIntersectState(visitor), minPackedValue, maxPackedValue);
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatePointCount(getIntersectState(visitor), minPackedValue, maxPackedValue);
}
/** Fast path: this is called when the query box fully encompasses all cells under this node. */
private void addAll(IntersectState state, boolean grown) throws IOException {
// System.out.println("R: addAll nodeID=" + nodeID);
if (grown == false) {
final long maxPointCount = (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
if (maxPointCount
<= Integer.MAX_VALUE) { // could be >MAX_VALUE if there are more than 2B points in total
state.visitor.grow((int) maxPointCount);
grown = true;
}
}
if (state.index.isLeafNode()) {
assert grown;
// System.out.println("ADDALL");
if (state.index.nodeExists()) {
visitDocIDs(state.in, state.index.getLeafBlockFP(), state.visitor);
}
// TODO: we can assert that the first value here in fact matches what the index claimed?
} else {
state.index.pushLeft();
addAll(state, grown);
state.index.pop();
state.index.pushRight();
addAll(state, grown);
state.index.pop();
}
}
/** Create a new {@link IntersectState} */
public IntersectState getIntersectState(IntersectVisitor visitor) {
IndexTree index = new IndexTree();
return new IntersectState(in.clone(), config, visitor, index);
}
/** Visits all docIDs and packed values in a single leaf block */
public void visitLeafBlockValues(IndexTree index, IntersectState state) throws IOException {
// Leaf node; scan and filter all points in this block:
int count = readDocIDs(state.in, index.getLeafBlockFP(), state.scratchIterator);
// Again, this time reading values and checking with the visitor
visitDocValues(
state.commonPrefixLengths,
state.scratchDataPackedValue,
state.scratchMinIndexPackedValue,
state.scratchMaxIndexPackedValue,
state.in,
state.scratchIterator,
count,
state.visitor);
}
private void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor)
throws IOException {
// Leaf node
in.seek(blockFP);
// How many points are stored in this leaf cell:
int count = in.readVInt();
// No need to call grow(), it has been called up-front
DocIdsWriter.readInts(in, count, visitor);
}
int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator iterator)
throws IOException {
in.seek(blockFP);
// How many points are stored in this leaf cell:
int count = in.readVInt();
DocIdsWriter.readInts(in, count, iterator.docIDs);
return count;
}
void visitDocValues(
int[] commonPrefixLengths,
byte[] scratchDataPackedValue,
byte[] scratchMinIndexPackedValue,
byte[] scratchMaxIndexPackedValue,
IndexInput in,
BKDReaderDocIDSetIterator scratchIterator,
int count,
IntersectVisitor visitor)
throws IOException {
if (version >= BKDWriter.VERSION_LOW_CARDINALITY_LEAVES) {
visitDocValuesWithCardinality(
commonPrefixLengths,
scratchDataPackedValue,
scratchMinIndexPackedValue,
scratchMaxIndexPackedValue,
in,
scratchIterator,
count,
visitor);
} else {
visitDocValuesNoCardinality(
commonPrefixLengths,
scratchDataPackedValue,
scratchMinIndexPackedValue,
scratchMaxIndexPackedValue,
in,
scratchIterator,
count,
visitor);
}
}
void visitDocValuesNoCardinality(
int[] commonPrefixLengths,
byte[] scratchDataPackedValue,
byte[] scratchMinIndexPackedValue,
byte[] scratchMaxIndexPackedValue,
IndexInput in,
BKDReaderDocIDSetIterator scratchIterator,
int count,
IntersectVisitor visitor)
throws IOException {
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
if (config.numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
// Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
// The index gives us range of values for each dimension, but the actual range of values
// might be much more narrow than what the index told us, so we double check the relation
// here, which is cheap yet might help figure out that the block either entirely matches
// or does not match at all. This is especially more likely in the case that there are
// multiple dimensions that have correlation, ie. splitting on one dimension also
// significantly changes the range of values in another dimension.
Relation r = visitor.compare(minPackedValue, maxPackedValue);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return;
}
visitor.grow(count);
if (r == Relation.CELL_INSIDE_QUERY) {
for (int i = 0; i < count; ++i) {
visitor.visit(scratchIterator.docIDs[i]);
}
return;
}
} else {
visitor.grow(count);
}
int compressedDim = readCompressedDim(in);
if (compressedDim == -1) {
visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
} else {
visitCompressedDocValues(
commonPrefixLengths,
scratchDataPackedValue,
in,
scratchIterator,
count,
visitor,
compressedDim);
}
}
void visitDocValuesWithCardinality(
int[] commonPrefixLengths,
byte[] scratchDataPackedValue,
byte[] scratchMinIndexPackedValue,
byte[] scratchMaxIndexPackedValue,
IndexInput in,
BKDReaderDocIDSetIterator scratchIterator,
int count,
IntersectVisitor visitor)
throws IOException {
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
int compressedDim = readCompressedDim(in);
if (compressedDim == -1) {
// all values are the same
visitor.grow(count);
visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
} else {
if (config.numIndexDims != 1) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(
scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
// Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
// The index gives us range of values for each dimension, but the actual range of values
// might be much more narrow than what the index told us, so we double check the relation
// here, which is cheap yet might help figure out that the block either entirely matches
// or does not match at all. This is especially more likely in the case that there are
// multiple dimensions that have correlation, ie. splitting on one dimension also
// significantly changes the range of values in another dimension.
Relation r = visitor.compare(minPackedValue, maxPackedValue);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return;
}
visitor.grow(count);
if (r == Relation.CELL_INSIDE_QUERY) {
for (int i = 0; i < count; ++i) {
visitor.visit(scratchIterator.docIDs[i]);
}
return;
}
} else {
visitor.grow(count);
}
if (compressedDim == -2) {
// low cardinality values
visitSparseRawDocValues(
commonPrefixLengths, scratchDataPackedValue, in, scratchIterator, count, visitor);
} else {
// high cardinality
visitCompressedDocValues(
commonPrefixLengths,
scratchDataPackedValue,
in,
scratchIterator,
count,
visitor,
compressedDim);
}
}
}
private void readMinMax(
int[] commonPrefixLengths, byte[] minPackedValue, byte[] maxPackedValue, IndexInput in)
throws IOException {
for (int dim = 0; dim < config.numIndexDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(minPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
in.readBytes(maxPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
}
// read cardinality and point
private void visitSparseRawDocValues(
int[] commonPrefixLengths,
byte[] scratchPackedValue,
IndexInput in,
BKDReaderDocIDSetIterator scratchIterator,
int count,
IntersectVisitor visitor)
throws IOException {
int i;
for (i = 0; i < count; ) {
int length = in.readVInt();
for (int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(
scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
scratchIterator.reset(i, length);
visitor.visit(scratchIterator, scratchPackedValue);
i += length;
}
if (i != count) {
throw new CorruptIndexException(
"Sub blocks do not add up to the expected count: " + count + " != " + i, in);
}
}
// point is under commonPrefix
private void visitUniqueRawDocValues(
byte[] scratchPackedValue,
BKDReaderDocIDSetIterator scratchIterator,
int count,
IntersectVisitor visitor)
throws IOException {
scratchIterator.reset(0, count);
visitor.visit(scratchIterator, scratchPackedValue);
}
private void visitCompressedDocValues(
int[] commonPrefixLengths,
byte[] scratchPackedValue,
IndexInput in,
BKDReaderDocIDSetIterator scratchIterator,
int count,
IntersectVisitor visitor,
int compressedDim)
throws IOException {
// the byte at `compressedByteOffset` is compressed using run-length compression,
// other suffix bytes are stored verbatim
final int compressedByteOffset =
compressedDim * config.bytesPerDim + commonPrefixLengths[compressedDim];
commonPrefixLengths[compressedDim]++;
int i;
for (i = 0; i < count; ) {
scratchPackedValue[compressedByteOffset] = in.readByte();
final int runLen = Byte.toUnsignedInt(in.readByte());
for (int j = 0; j < runLen; ++j) {
for (int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(
scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
visitor.visit(scratchIterator.docIDs[i + j], scratchPackedValue);
}
i += runLen;
}
if (i != count) {
throw new CorruptIndexException(
"Sub blocks do not add up to the expected count: " + count + " != " + i, in);
}
}
private int readCompressedDim(IndexInput in) throws IOException {
int compressedDim = in.readByte();
if (compressedDim < -2
|| compressedDim >= config.numDims
|| (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
throw new CorruptIndexException("Got compressedDim=" + compressedDim, in);
}
return compressedDim;
}
private void readCommonPrefixes(
int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
for (int dim = 0; dim < config.numDims; dim++) {
int prefix = in.readVInt();
commonPrefixLengths[dim] = prefix;
if (prefix > 0) {
in.readBytes(scratchPackedValue, dim * config.bytesPerDim, prefix);
}
// System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
}
}
private void intersect(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked)
throws IOException {
/*
System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
}
*/
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.CELL_OUTSIDE_QUERY) {
// This cell is fully outside of the query shape: stop recursing
} else if (r == Relation.CELL_INSIDE_QUERY) {
// This cell is fully inside of the query shape: recursively add all points in this cell
// without filtering
addAll(state, false);
// The cell crosses the shape boundary, or the cell fully contains the query, so we fall
// through and do full filtering:
} else if (state.index.isLeafNode()) {
// TODO: we can assert that the first value here in fact matches what the index claimed?
// In the unbalanced case it's possible the left most node only has one child:
if (state.index.nodeExists()) {
// Leaf node; scan and filter all points in this block:
int count = readDocIDs(state.in, state.index.getLeafBlockFP(), state.scratchIterator);
// Again, this time reading values and checking with the visitor
visitDocValues(
state.commonPrefixLengths,
state.scratchDataPackedValue,
state.scratchMinIndexPackedValue,
state.scratchMaxIndexPackedValue,
state.in,
state.scratchIterator,
count,
state.visitor);
}
} else {
// Non-leaf node: recurse on the split left and right nodes
int splitDim = state.index.getSplitDim();
assert splitDim >= 0
: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
assert splitDim < config.numIndexDims
: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
byte[] splitPackedValue = state.index.getSplitPackedValue();
BytesRef splitDimValue = state.index.getSplitDimValue();
assert splitDimValue.length == config.bytesPerDim;
// System.out.println(" splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
// make sure cellMin <= splitValue <= cellMax:
assert Arrays.compareUnsigned(
cellMinPacked,
splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim,
splitDimValue.bytes,
splitDimValue.offset,
splitDimValue.offset + config.bytesPerDim)
<= 0
: "config.bytesPerDim="
+ config.bytesPerDim
+ " splitDim="
+ splitDim
+ " config.numIndexDims="
+ config.numIndexDims
+ " config.numDims="
+ config.numDims;
assert Arrays.compareUnsigned(
cellMaxPacked,
splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim,
splitDimValue.bytes,
splitDimValue.offset,
splitDimValue.offset + config.bytesPerDim)
>= 0
: "config.bytesPerDim="
+ config.bytesPerDim
+ " splitDim="
+ splitDim
+ " config.numIndexDims="
+ config.numIndexDims
+ " config.numDims="
+ config.numDims;
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(
splitDimValue.bytes,
splitDimValue.offset,
splitPackedValue,
splitDim * config.bytesPerDim,
config.bytesPerDim);
state.index.pushLeft();
intersect(state, cellMinPacked, splitPackedValue);
state.index.pop();
// Restore the split dim value since it may have been overwritten while recursing:
System.arraycopy(
splitPackedValue,
splitDim * config.bytesPerDim,
splitDimValue.bytes,
splitDimValue.offset,
config.bytesPerDim);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(
splitDimValue.bytes,
splitDimValue.offset,
splitPackedValue,
splitDim * config.bytesPerDim,
config.bytesPerDim);
state.index.pushRight();
intersect(state, splitPackedValue, cellMaxPacked);
state.index.pop();
}
}
private long estimatePointCount(
IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) {
/*
System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
}
*/
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.CELL_OUTSIDE_QUERY) {
// This cell is fully outside of the query shape: stop recursing
return 0L;
} else if (r == Relation.CELL_INSIDE_QUERY) {
return (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
} else if (state.index.isLeafNode()) {
// Assume half the points matched
return (config.maxPointsInLeafNode + 1) / 2;
} else {
// Non-leaf node: recurse on the split left and right nodes
int splitDim = state.index.getSplitDim();
assert splitDim >= 0
: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
assert splitDim < config.numIndexDims
: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
byte[] splitPackedValue = state.index.getSplitPackedValue();
BytesRef splitDimValue = state.index.getSplitDimValue();
assert splitDimValue.length == config.bytesPerDim;
// System.out.println(" splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
// make sure cellMin <= splitValue <= cellMax:
assert Arrays.compareUnsigned(
cellMinPacked,
splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim,
splitDimValue.bytes,
splitDimValue.offset,
splitDimValue.offset + config.bytesPerDim)
<= 0
: "config.bytesPerDim="
+ config.bytesPerDim
+ " splitDim="
+ splitDim
+ " config.numIndexDims="
+ config.numIndexDims
+ " config.numDims="
+ config.numDims;
assert Arrays.compareUnsigned(
cellMaxPacked,
splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim,
splitDimValue.bytes,
splitDimValue.offset,
splitDimValue.offset + config.bytesPerDim)
>= 0
: "config.bytesPerDim="
+ config.bytesPerDim
+ " splitDim="
+ splitDim
+ " config.numIndexDims="
+ config.numIndexDims
+ " config.numDims="
+ config.numDims;
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(
splitDimValue.bytes,
splitDimValue.offset,
splitPackedValue,
splitDim * config.bytesPerDim,
config.bytesPerDim);
state.index.pushLeft();
final long leftCost = estimatePointCount(state, cellMinPacked, splitPackedValue);
state.index.pop();
// Restore the split dim value since it may have been overwritten while recursing:
System.arraycopy(
splitPackedValue,
splitDim * config.bytesPerDim,
splitDimValue.bytes,
splitDimValue.offset,
config.bytesPerDim);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(
splitDimValue.bytes,
splitDimValue.offset,
splitPackedValue,
splitDim * config.bytesPerDim,
config.bytesPerDim);
state.index.pushRight();
final long rightCost = estimatePointCount(state, splitPackedValue, cellMaxPacked);
state.index.pop();
return leftCost + rightCost;
}
}
@Override
public byte[] getMinPackedValue() {
return minPackedValue.clone();
}
@Override
public byte[] getMaxPackedValue() {
return maxPackedValue.clone();
}
@Override
public int getNumDimensions() {
return config.numDims;
}
@Override
public int getNumIndexDimensions() {
return config.numIndexDims;
}
@Override
public int getBytesPerDimension() {
return config.bytesPerDim;
}
@Override
public long size() {
return pointCount;
}
@Override
public int getDocCount() {
return docCount;
}
public boolean isLeafNode(int nodeID) {
return nodeID >= leafNodeOffset;
}
/** Reusable {@link DocIdSetIterator} to handle low cardinality leaves. */
protected static class BKDReaderDocIDSetIterator extends DocIdSetIterator {
private int idx;
private int length;
private int offset;
private int docID;
final int[] docIDs;
public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) {
this.docIDs = new int[maxPointsInLeafNode];
}
@Override
public int docID() {
return docID;
}
private void reset(int offset, int length) {
this.offset = offset;
this.length = length;
assert offset + length <= docIDs.length;
this.docID = -1;
this.idx = 0;
}
@Override
public int nextDoc() throws IOException {
if (idx == length) {
docID = DocIdSetIterator.NO_MORE_DOCS;
} else {
docID = docIDs[offset + idx];
idx++;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}
@Override
public long cost() {
return length;
}
}
}