blob: d37284aae4b3bf49b9c11ff464adb4a2c04e0407 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Random;
import org.apache.lucene.codecs.MutablePointValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.mockfile.ExtrasFS;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.CorruptingIndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FutureArrays;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
public class TestBKD extends LuceneTestCase {
public void testBasicInts1D() throws Exception {
try (Directory dir = getDirectory(100)) {
BKDWriter w = new BKDWriter(100, dir, "tmp", new BKDConfig(1, 1, 4, 2), 1.0f, 100);
byte[] scratch = new byte[4];
for(int docID=0;docID<100;docID++) {
NumericUtils.intToSortableBytes(docID, scratch, 0);
w.add(scratch, docID);
}
long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
Runnable finalizer = w.finish(out, out, out);
indexFP = out.getFilePointer();
finalizer.run();
}
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in);
// Simple 1D range query:
final int queryMin = 42;
final int queryMax = 87;
final BitSet hits = new BitSet();
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docID);
if (VERBOSE) {
System.out.println("visit docID=" + docID);
}
}
@Override
public void visit(int docID, byte[] packedValue) {
int x = NumericUtils.sortableBytesToInt(packedValue, 0);
if (VERBOSE) {
System.out.println("visit docID=" + docID + " x=" + x);
}
if (x >= queryMin && x <= queryMax) {
hits.set(docID);
}
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
int min = NumericUtils.sortableBytesToInt(minPacked, 0);
int max = NumericUtils.sortableBytesToInt(maxPacked, 0);
assert max >= min;
if (VERBOSE) {
System.out.println("compare: min=" + min + " max=" + max + " vs queryMin=" + queryMin + " queryMax=" + queryMax);
}
if (max < queryMin || min > queryMax) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min >= queryMin && max <= queryMax) {
return Relation.CELL_INSIDE_QUERY;
} else {
return Relation.CELL_CROSSES_QUERY;
}
}
});
for(int docID=0;docID<100;docID++) {
boolean expected = docID >= queryMin && docID <= queryMax;
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
}
}
public void testRandomIntsNDims() throws Exception {
int numDocs = atLeast(1000);
try (Directory dir = getDirectory(numDocs)) {
int numDims = TestUtil.nextInt(random(), 1, 5);
int numIndexDims = TestUtil.nextInt(random(), 1, numDims);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat());
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", new BKDConfig(numDims, numIndexDims, 4, maxPointsInLeafNode), maxMB, numDocs);
if (VERBOSE) {
System.out.println("TEST: numDims=" + numDims + " numIndexDims=" + numIndexDims + " numDocs=" + numDocs);
}
int[][] docs = new int[numDocs][];
byte[] scratch = new byte[4*numDims];
int[] minValue = new int[numDims];
int[] maxValue = new int[numDims];
Arrays.fill(minValue, Integer.MAX_VALUE);
Arrays.fill(maxValue, Integer.MIN_VALUE);
for(int docID=0;docID<numDocs;docID++) {
int[] values = new int[numDims];
if (VERBOSE) {
System.out.println(" docID=" + docID);
}
for(int dim=0;dim<numDims;dim++) {
values[dim] = random().nextInt();
if (values[dim] < minValue[dim]) {
minValue[dim] = values[dim];
}
if (values[dim] > maxValue[dim]) {
maxValue[dim] = values[dim];
}
NumericUtils.intToSortableBytes(values[dim], scratch, dim * Integer.BYTES);
if (VERBOSE) {
System.out.println(" " + dim + " -> " + values[dim]);
}
}
docs[docID] = values;
w.add(scratch, docID);
}
long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
Runnable finalizer = w.finish(out, out, out);
indexFP = out.getFilePointer();
finalizer.run();
}
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in);
byte[] minPackedValue = r.getMinPackedValue();
byte[] maxPackedValue = r.getMaxPackedValue();
for(int dim=0;dim<numIndexDims;dim++) {
assertEquals(minValue[dim], NumericUtils.sortableBytesToInt(minPackedValue, dim * Integer.BYTES));
assertEquals(maxValue[dim], NumericUtils.sortableBytesToInt(maxPackedValue, dim * Integer.BYTES));
}
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
// Random N dims rect query:
int[] queryMin = new int[numDims];
int[] queryMax = new int[numDims];
for(int dim=0;dim<numIndexDims;dim++) {
queryMin[dim] = random().nextInt();
queryMax[dim] = random().nextInt();
if (queryMin[dim] > queryMax[dim]) {
int x = queryMin[dim];
queryMin[dim] = queryMax[dim];
queryMax[dim] = x;
}
}
final BitSet hits = new BitSet();
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docID);
//System.out.println("visit docID=" + docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
//System.out.println("visit check docID=" + docID);
for(int dim=0;dim<numIndexDims;dim++) {
int x = NumericUtils.sortableBytesToInt(packedValue, dim * Integer.BYTES);
if (x < queryMin[dim] || x > queryMax[dim]) {
//System.out.println(" no");
return;
}
}
//System.out.println(" yes");
hits.set(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
boolean crosses = false;
for(int dim=0;dim<numIndexDims;dim++) {
int min = NumericUtils.sortableBytesToInt(minPacked, dim * Integer.BYTES);
int max = NumericUtils.sortableBytesToInt(maxPacked, dim * Integer.BYTES);
assert max >= min;
if (max < queryMin[dim] || min > queryMax[dim]) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min < queryMin[dim] || max > queryMax[dim]) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
for(int docID=0;docID<numDocs;docID++) {
int[] docValues = docs[docID];
boolean expected = true;
for(int dim=0;dim<numIndexDims;dim++) {
int x = docValues[dim];
if (x < queryMin[dim] || x > queryMax[dim]) {
expected = false;
break;
}
}
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
}
}
}
// Tests on N-dimensional points where each dimension is a BigInteger
public void testBigIntNDims() throws Exception {
int numDocs = atLeast(1000);
try (Directory dir = getDirectory(numDocs)) {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDims = TestUtil.nextInt(random(), 1, 5);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat());
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", new BKDConfig(numDims, numDims, numBytesPerDim, maxPointsInLeafNode), maxMB, numDocs);
BigInteger[][] docs = new BigInteger[numDocs][];
byte[] scratch = new byte[numBytesPerDim*numDims];
for(int docID=0;docID<numDocs;docID++) {
BigInteger[] values = new BigInteger[numDims];
if (VERBOSE) {
System.out.println(" docID=" + docID);
}
for(int dim=0;dim<numDims;dim++) {
values[dim] = randomBigInt(numBytesPerDim);
NumericUtils.bigIntToSortableBytes(values[dim], numBytesPerDim, scratch, dim * numBytesPerDim);
if (VERBOSE) {
System.out.println(" " + dim + " -> " + values[dim]);
}
}
docs[docID] = values;
w.add(scratch, docID);
}
long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
Runnable finalizer = w.finish(out, out, out);
indexFP = out.getFilePointer();
finalizer.run();
}
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in);
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
// Random N dims rect query:
BigInteger[] queryMin = new BigInteger[numDims];
BigInteger[] queryMax = new BigInteger[numDims];
for(int dim=0;dim<numDims;dim++) {
queryMin[dim] = randomBigInt(numBytesPerDim);
queryMax[dim] = randomBigInt(numBytesPerDim);
if (queryMin[dim].compareTo(queryMax[dim]) > 0) {
BigInteger x = queryMin[dim];
queryMin[dim] = queryMax[dim];
queryMax[dim] = x;
}
}
final BitSet hits = new BitSet();
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docID);
//System.out.println("visit docID=" + docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
//System.out.println("visit check docID=" + docID);
for(int dim=0;dim<numDims;dim++) {
BigInteger x = NumericUtils.sortableBytesToBigInt(packedValue, dim * numBytesPerDim, numBytesPerDim);
if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
//System.out.println(" no");
return;
}
}
//System.out.println(" yes");
hits.set(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
BigInteger min = NumericUtils.sortableBytesToBigInt(minPacked, dim * numBytesPerDim, numBytesPerDim);
BigInteger max = NumericUtils.sortableBytesToBigInt(maxPacked, dim * numBytesPerDim, numBytesPerDim);
assert max.compareTo(min) >= 0;
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
for(int docID=0;docID<numDocs;docID++) {
BigInteger[] docValues = docs[docID];
boolean expected = true;
for(int dim=0;dim<numDims;dim++) {
BigInteger x = docValues[dim];
if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
expected = false;
break;
}
}
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
}
}
}
/** Make sure we close open files, delete temp files, etc., on exception */
public void testWithExceptions() throws Exception {
int numDocs = atLeast(10000);
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
}
double maxMBHeap = 0.05;
// Keep retrying until we 1) we allow a big enough heap, and 2) we hit a random IOExc from MDW:
boolean done = false;
while (done == false) {
MockDirectoryWrapper dir = newMockFSDirectory(createTempDir());
try {
dir.setRandomIOExceptionRate(0.05);
dir.setRandomIOExceptionRateOnOpen(0.05);
verify(dir, docValues, null, numDataDims, numIndexDims, numBytesPerDim, 50, maxMBHeap);
} catch (IllegalArgumentException iae) {
// This just means we got a too-small maxMB for the maxPointsInLeafNode; just retry w/ more heap
assertTrue(iae.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
maxMBHeap *= 1.25;
} catch (IOException ioe) {
if (ioe.getMessage().contains("a random IOException")) {
// BKDWriter should fully clean up after itself:
done = true;
} else {
throw ioe;
}
}
String[] files = Arrays.stream(dir.listAll())
.filter(file -> !ExtrasFS.isExtra(file))
.toArray(String[]::new);
assertTrue("files=" + Arrays.toString(files), files.length == 0);
dir.close();
}
}
public void testRandomBinaryTiny() throws Exception {
doTestRandomBinary(10);
}
public void testRandomBinaryMedium() throws Exception {
doTestRandomBinary(10000);
}
@Nightly
public void testRandomBinaryBig() throws Exception {
doTestRandomBinary(200000);
}
public void testTooLittleHeap() throws Exception {
try (Directory dir = getDirectory(0)) {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new BKDWriter(1, dir, "bkd", new BKDConfig(1, 1, 16, 1000000), 0.001, 0);
});
assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
}
}
private void doTestRandomBinary(int count) throws Exception {
int numDocs = TestUtil.nextInt(random(), count, count*2);
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
}
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
}
public void testAllEqual() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(1000);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
if (docID == 0) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
} else {
docValues[docID] = docValues[0];
}
}
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
}
public void testIndexDimEqualDataDimDifferent() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 2, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims - 1), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(1000);
byte[][][] docValues = new byte[numDocs][][];
byte[][] indexDimensions = new byte[numDataDims][];
for(int dim=0;dim<numIndexDims;dim++) {
indexDimensions[dim] = new byte[numBytesPerDim];
random().nextBytes(indexDimensions[dim]);
}
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numIndexDims;dim++) {
values[dim] = indexDimensions[dim];
}
for (int dim = numIndexDims; dim < numDataDims; dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
}
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
}
public void testOneDimEqual() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(1000);
int theEqualDim = random().nextInt(numDataDims);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues[docID] = values;
if (docID > 0) {
docValues[docID][theEqualDim] = docValues[0][theEqualDim];
}
}
// Use a small number of points in leaf blocks to trigger a lot of splitting
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim, TestUtil.nextInt(random(), 20, 50));
}
// This triggers the logic that makes sure all dimensions get indexed
// by looking at how many times each dim has been split
public void testOneDimLowCard() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 2, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 2, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(10000);
int theLowCardDim = random().nextInt(numDataDims);
byte[] value1 = new byte[numBytesPerDim];
random().nextBytes(value1);
byte[] value2 = value1.clone();
if (value2[numBytesPerDim-1] == 0 || random().nextBoolean()) {
value2[numBytesPerDim-1]++;
} else {
value2[numBytesPerDim-1]--;
}
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
if (dim == theLowCardDim) {
values[dim] = random().nextBoolean() ? value1 : value2;
} else {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
}
docValues[docID] = values;
}
// Use a small number of points in leaf blocks to trigger a lot of splitting
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim, TestUtil.nextInt(random(), 20, 50));
}
// this should trigger run-length compression with lengths that are greater than 255
public void testOneDimTwoValues() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(1000);
int theDim = random().nextInt(numDataDims);
byte[] value1 = new byte[numBytesPerDim];
random().nextBytes(value1);
byte[] value2 = new byte[numBytesPerDim];
random().nextBytes(value2);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
if (dim == theDim) {
values[dim] = random().nextBoolean() ? value1 : value2;
} else {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
}
docValues[docID] = values;
}
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
}
// this should trigger low cardinality leaves
public void testRandomFewDifferentValues() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(10000);
int cardinality = TestUtil.nextInt(random(), 2, 100);
byte[][][] values = new byte[cardinality][numDataDims][numBytesPerDim];
for (int i = 0; i < cardinality; i++) {
for (int j = 0; j < numDataDims; j++) {
random().nextBytes(values[i][j]);
}
}
byte[][][] docValues = new byte[numDocs][][];
for(int docID = 0; docID < numDocs; docID++) {
docValues[docID] = values[random().nextInt(cardinality)];
}
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
}
public void testMultiValued() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numIndexDims = Math.min(TestUtil.nextInt(random(), 1, numDataDims), PointValues.MAX_INDEX_DIMENSIONS);
int numDocs = atLeast(1000);
List<byte[][]> docValues = new ArrayList<>();
List<Integer> docIDs = new ArrayList<>();
for(int docID=0;docID<numDocs;docID++) {
int numValuesInDoc = TestUtil.nextInt(random(), 1, 5);
for(int ord=0;ord<numValuesInDoc;ord++) {
docIDs.add(docID);
byte[][] values = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
values[dim] = new byte[numBytesPerDim];
random().nextBytes(values[dim]);
}
docValues.add(values);
}
}
byte[][][] docValuesArray = docValues.toArray(new byte[docValues.size()][][]);
int[] docIDsArray = new int[docIDs.size()];
for(int i=0;i<docIDsArray.length;i++) {
docIDsArray[i] = docIDs.get(i);
}
verify(docValuesArray, docIDsArray, numDataDims, numIndexDims, numBytesPerDim);
}
/** docIDs can be null, for the single valued case, else it maps value to docID */
private void verify(byte[][][] docValues, int[] docIDs, int numDataDims, int numIndexDims, int numBytesPerDim) throws Exception {
verify(docValues, docIDs, numDataDims, numIndexDims, numBytesPerDim, TestUtil.nextInt(random(), 50, 1000));
}
private void verify(byte[][][] docValues, int[] docIDs, int numDataDims, int numIndexDims, int numBytesPerDim,
int maxPointsInLeafNode) throws Exception {
try (Directory dir = getDirectory(docValues.length)) {
double maxMB = (float) 3.0 + (3*random().nextDouble());
verify(dir, docValues, docIDs, numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
}
}
private void verify(Directory dir, byte[][][] docValues, int[] docIDs, int numDataDims, int numIndexDims, int numBytesPerDim, int maxPointsInLeafNode, double maxMB) throws Exception {
int numValues = docValues.length;
if (VERBOSE) {
System.out.println("TEST: numValues=" + numValues + " numDataDims=" + numDataDims + " numIndexDims=" + numIndexDims + " numBytesPerDim=" + numBytesPerDim + " maxPointsInLeafNode=" + maxPointsInLeafNode + " maxMB=" + maxMB);
}
List<Long> toMerge = null;
List<MergeState.DocMap> docMaps = null;
int seg = 0;
//we force sometimes to provide a bigger point count
long maxDocs = Long.MIN_VALUE;
if (random().nextBoolean()) {
maxDocs = docValues.length;
} else {
while (maxDocs < docValues.length) {
maxDocs = random().nextLong();
}
}
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode), maxMB, maxDocs);
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
IndexInput in = null;
boolean success = false;
try {
byte[] scratch = new byte[numBytesPerDim*numDataDims];
int lastDocIDBase = 0;
boolean useMerge = numDataDims == 1 && numValues >= 10 && random().nextBoolean();
int valuesInThisSeg;
if (useMerge) {
// Sometimes we will call merge with a single segment:
valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues);
} else {
valuesInThisSeg = 0;
}
int segCount = 0;
for(int ord=0;ord<numValues;ord++) {
int docID;
if (docIDs == null) {
docID = ord;
} else {
docID = docIDs[ord];
}
if (VERBOSE) {
System.out.println(" ord=" + ord + " docID=" + docID + " lastDocIDBase=" + lastDocIDBase);
}
for(int dim=0;dim<numDataDims;dim++) {
if (VERBOSE) {
System.out.println(" " + dim + " -> " + new BytesRef(docValues[ord][dim]));
}
System.arraycopy(docValues[ord][dim], 0, scratch, dim*numBytesPerDim, numBytesPerDim);
}
w.add(scratch, docID-lastDocIDBase);
segCount++;
if (useMerge && segCount == valuesInThisSeg) {
if (toMerge == null) {
toMerge = new ArrayList<>();
docMaps = new ArrayList<>();
}
final int curDocIDBase = lastDocIDBase;
docMaps.add(new MergeState.DocMap() {
@Override
public int get(int docID) {
return curDocIDBase + docID;
}
});
Runnable finalizer = w.finish(out, out, out);
toMerge.add(out.getFilePointer());
finalizer.run();
valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues/2);
segCount = 0;
seg++;
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
maxMB = (float) 3.0 + (3*random().nextDouble());
w = new BKDWriter(numValues, dir, "_" + seg, new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode), maxMB, docValues.length);
lastDocIDBase = docID;
}
}
long indexFP;
if (toMerge != null) {
if (segCount > 0) {
Runnable finalizer = w.finish(out, out, out);
toMerge.add(out.getFilePointer());
finalizer.run();
final int curDocIDBase = lastDocIDBase;
docMaps.add(new MergeState.DocMap() {
@Override
public int get(int docID) {
return curDocIDBase + docID;
}
});
}
out.close();
in = dir.openInput("bkd", IOContext.DEFAULT);
seg++;
w = new BKDWriter(numValues, dir, "_" + seg, new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode), maxMB, docValues.length);
List<BKDReader> readers = new ArrayList<>();
for(long fp : toMerge) {
in.seek(fp);
readers.add(new BKDReader(in, in, in));
}
out = dir.createOutput("bkd2", IOContext.DEFAULT);
Runnable finalizer = w.merge(out, out, out, docMaps, readers);
indexFP = out.getFilePointer();
finalizer.run();
out.close();
in.close();
in = dir.openInput("bkd2", IOContext.DEFAULT);
} else {
Runnable finalizer = w.finish(out, out, out);
indexFP = out.getFilePointer();
finalizer.run();
out.close();
in = dir.openInput("bkd", IOContext.DEFAULT);
}
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in);
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
// Random N dims rect query:
byte[][] queryMin = new byte[numDataDims][];
byte[][] queryMax = new byte[numDataDims][];
for(int dim=0;dim<numDataDims;dim++) {
queryMin[dim] = new byte[numBytesPerDim];
random().nextBytes(queryMin[dim]);
queryMax[dim] = new byte[numBytesPerDim];
random().nextBytes(queryMax[dim]);
if (FutureArrays.compareUnsigned(queryMin[dim], 0, numBytesPerDim, queryMax[dim], 0, numBytesPerDim) > 0) {
byte[] x = queryMin[dim];
queryMin[dim] = queryMax[dim];
queryMax[dim] = x;
}
}
final BitSet hits = new BitSet();
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docID);
//System.out.println("visit docID=" + docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
//System.out.println("visit check docID=" + docID);
for(int dim=0;dim<numIndexDims;dim++) {
if (FutureArrays.compareUnsigned(packedValue, dim * numBytesPerDim, dim * numBytesPerDim + numBytesPerDim, queryMin[dim], 0, numBytesPerDim) < 0 ||
FutureArrays.compareUnsigned(packedValue, dim * numBytesPerDim, dim * numBytesPerDim + numBytesPerDim, queryMax[dim], 0, numBytesPerDim) > 0) {
//System.out.println(" no");
return;
}
}
//System.out.println(" yes");
hits.set(docID);
}
@Override
public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
if (random().nextBoolean()) {
// check the default method is correct
IntersectVisitor.super.visit(iterator, packedValue);
} else {
assertEquals(iterator.docID(), -1);
int cost = Math.toIntExact(iterator.cost());
int numberOfPoints = 0;
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(iterator.docID(), docID);
visit(docID, packedValue);
numberOfPoints++;
}
assertEquals(cost, numberOfPoints);
assertEquals(iterator.docID(), DocIdSetIterator.NO_MORE_DOCS);
assertEquals(iterator.nextDoc(), DocIdSetIterator.NO_MORE_DOCS);
assertEquals(iterator.docID(), DocIdSetIterator.NO_MORE_DOCS);
}
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
boolean crosses = false;
for(int dim=0;dim<numIndexDims;dim++) {
if (FutureArrays.compareUnsigned(maxPacked, dim * numBytesPerDim, dim * numBytesPerDim + numBytesPerDim, queryMin[dim], 0, numBytesPerDim) < 0 ||
FutureArrays.compareUnsigned(minPacked, dim * numBytesPerDim, dim * numBytesPerDim + numBytesPerDim, queryMax[dim], 0, numBytesPerDim) > 0) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (FutureArrays.compareUnsigned(minPacked, dim * numBytesPerDim, dim * numBytesPerDim + numBytesPerDim, queryMin[dim], 0, numBytesPerDim) < 0 ||
FutureArrays.compareUnsigned(maxPacked, dim * numBytesPerDim, dim * numBytesPerDim + numBytesPerDim, queryMax[dim], 0, numBytesPerDim) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
BitSet expected = new BitSet();
for(int ord=0;ord<numValues;ord++) {
boolean matches = true;
for(int dim=0;dim<numIndexDims;dim++) {
byte[] x = docValues[ord][dim];
if (FutureArrays.compareUnsigned(x, 0, numBytesPerDim, queryMin[dim], 0, numBytesPerDim) < 0 ||
FutureArrays.compareUnsigned(x, 0, numBytesPerDim, queryMax[dim], 0, numBytesPerDim) > 0) {
matches = false;
break;
}
}
if (matches) {
int docID;
if (docIDs == null) {
docID = ord;
} else {
docID = docIDs[ord];
}
expected.set(docID);
}
}
int limit = Math.max(expected.length(), hits.length());
for(int docID=0;docID<limit;docID++) {
assertEquals("docID=" + docID, expected.get(docID), hits.get(docID));
}
}
in.close();
dir.deleteFile("bkd");
if (toMerge != null) {
dir.deleteFile("bkd2");
}
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(w, in, out);
IOUtils.deleteFilesIgnoringExceptions(dir, "bkd", "bkd2");
}
}
}
private BigInteger randomBigInt(int numBytes) {
BigInteger x = new BigInteger(numBytes*8-1, random());
if (random().nextBoolean()) {
x = x.negate();
}
return x;
}
private Directory getDirectory(int numPoints) {
Directory dir;
if (numPoints > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = newDirectory();
}
return dir;
}
/** Make sure corruption on an input sort file is caught, even if BKDWriter doesn't get angry */
public void testBitFlippedOnPartition1() throws Exception {
// Generate fixed data set:
int numDocs = atLeast(10000);
int numBytesPerDim = 4;
int numDims = 3;
byte[][][] docValues = new byte[numDocs][][];
byte counter = 0;
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
for(int i=0;i<values[dim].length;i++) {
values[dim][i] = counter;
counter++;
}
}
docValues[docID] = values;
}
try (Directory dir0 = newMockDirectory()) {
Directory dir = new FilterDirectory(dir0) {
boolean corrupted;
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
IndexOutput out = in.createTempOutput(prefix, suffix, context);
if (corrupted == false && prefix.equals("_0") && suffix.equals("bkd_left0")) {
corrupted = true;
return new CorruptingIndexOutput(dir0, 22, out);
} else {
return out;
}
}
};
CorruptIndexException e = expectThrows(CorruptIndexException.class, () -> {
verify(dir, docValues, null, numDims, numDims, numBytesPerDim, 50, 0.1);
});
assertTrue(e.getMessage().contains("checksum failed (hardware problem?)"));
}
}
/** Make sure corruption on a recursed partition is caught, when BKDWriter does get angry */
public void testBitFlippedOnPartition2() throws Exception {
// Generate fixed data set:
int numDocs = atLeast(10000);
int numBytesPerDim = 4;
int numDims = 3;
byte[][][] docValues = new byte[numDocs][][];
byte counter = 0;
for(int docID=0;docID<numDocs;docID++) {
byte[][] values = new byte[numDims][];
for(int dim=0;dim<numDims;dim++) {
values[dim] = new byte[numBytesPerDim];
for(int i=0;i<values[dim].length;i++) {
values[dim][i] = counter;
counter++;
}
}
docValues[docID] = values;
}
try (Directory dir0 = newMockDirectory()) {
Directory dir = new FilterDirectory(dir0) {
boolean corrupted;
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
IndexOutput out = in.createTempOutput(prefix, suffix, context);
//System.out.println("prefix=" + prefix + " suffix=" + suffix);
if (corrupted == false && suffix.equals("bkd_left0")) {
//System.out.println("now corrupt byte=" + x + " prefix=" + prefix + " suffix=" + suffix);
corrupted = true;
return new CorruptingIndexOutput(dir0, 22072, out);
} else {
return out;
}
}
};
Throwable t = expectThrows(CorruptIndexException.class, () -> {
verify(dir, docValues, null, numDims, numDims, numBytesPerDim, 50, 0.1);
});
assertCorruptionDetected(t);
}
}
private void assertCorruptionDetected(Throwable t) {
if (t instanceof CorruptIndexException) {
if (t.getMessage().contains("checksum failed (hardware problem?)")) {
return;
}
}
for(Throwable suppressed : t.getSuppressed()) {
if (suppressed instanceof CorruptIndexException) {
if (suppressed.getMessage().contains("checksum failed (hardware problem?)")) {
return;
}
}
}
fail("did not see a suppressed CorruptIndexException");
}
public void testTieBreakOrder() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 10000;
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", new BKDConfig(1, 1, Integer.BYTES, 2), 0.01f, numDocs);
for(int i=0;i<numDocs;i++) {
w.add(new byte[Integer.BYTES], i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
Runnable finalizer = w.finish(out, out, out);
long fp = out.getFilePointer();
finalizer.run();
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in, in, in);
r.intersect(new IntersectVisitor() {
int lastDocID = -1;
@Override
public void visit(int docID) {
assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
lastDocID = docID;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
});
in.close();
}
}
public void testCheckDataDimOptimalOrder() throws IOException {
Directory dir = newDirectory();
final int numValues = atLeast(5000);
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final double maxMB = (float) 3.0 + (3*random().nextDouble());
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final byte[] pointValue1 = new byte[numDataDims * numBytesPerDim];
final byte[] pointValue2 = new byte[numDataDims * numBytesPerDim];
random().nextBytes(pointValue1);
random().nextBytes(pointValue2);
// equal index dimensions but different data dimensions
for (int i = 0; i < numIndexDims; i++) {
System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
}
BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode),
maxMB, 2 * numValues);
for (int i = 0; i < numValues; ++i) {
w.add(pointValue1, i);
w.add(pointValue2, i);
}
final long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
Runnable finalizer = w.finish(out, out, out);
indexFP = out.getFilePointer();
finalizer.run();
w.close();
}
IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
pointsIn.seek(indexFP);
BKDReader points = new BKDReader(pointsIn, pointsIn, pointsIn);
points.intersect(new IntersectVisitor() {
byte[] previous = null;
boolean hasChanged = false;
@Override
public void visit(int docID) {
throw new UnsupportedOperationException();
}
@Override
public void visit(int docID, byte[] packedValue) {
if (previous == null) {
previous = new byte[numDataDims * numBytesPerDim];
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
} else {
int mismatch = FutureArrays.mismatch(packedValue, 0, numDataDims * numBytesPerDim, previous, 0, numDataDims * numBytesPerDim);
if (mismatch != -1) {
if (hasChanged == false) {
hasChanged = true;
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
} else {
fail("Points are not in optimal order");
}
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
pointsIn.close();
dir.close();
}
public void test2DLongOrdsOffline() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 100000;
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", new BKDConfig(2, 2, Integer.BYTES, 2), 0.01f, numDocs);
byte[] buffer = new byte[2*Integer.BYTES];
for(int i=0;i<numDocs;i++) {
random().nextBytes(buffer);
w.add(buffer, i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
Runnable finalizer = w.finish(out, out, out);
long fp = out.getFilePointer();
finalizer.run();
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in, in, in);
int[] count = new int[1];
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
count[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
if (random().nextInt(7) == 1) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
assertEquals(numDocs, count[0]);
in.close();
}
}
// Claims 16 bytes per dim, but only use the bottom N 1-3 bytes; this would happen e.g. if a user indexes what are actually just short
// values as a LongPoint:
public void testWastedLeadingBytes() throws Exception {
Random random = random();
int numDims = TestUtil.nextInt(random, 1, PointValues.MAX_INDEX_DIMENSIONS);
int numIndexDims = TestUtil.nextInt(random, 1, numDims);
int bytesPerDim = PointValues.MAX_NUM_BYTES;
int bytesUsed = TestUtil.nextInt(random, 1, 3);
Directory dir = newFSDirectory(createTempDir());
int numDocs = atLeast(10000);
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", new BKDConfig(numDims, numIndexDims, bytesPerDim, 32), 1f, numDocs);
byte[] tmp = new byte[bytesUsed];
byte[] buffer = new byte[numDims * bytesPerDim];
for(int i=0;i<numDocs;i++) {
for(int dim=0;dim<numDims;dim++) {
random.nextBytes(tmp);
System.arraycopy(tmp, 0, buffer, dim*bytesPerDim+(bytesPerDim-bytesUsed), tmp.length);
}
w.add(buffer, i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
Runnable finalizer = w.finish(out, out, out);
long fp = out.getFilePointer();
finalizer.run();
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in, in, in);
int[] count = new int[1];
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
count[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
assert packedValue.length == numDims * bytesPerDim;
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
assert minPacked.length == numIndexDims * bytesPerDim;
assert maxPacked.length == numIndexDims * bytesPerDim;
if (random().nextInt(7) == 1) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
assertEquals(numDocs, count[0]);
in.close();
dir.close();
}
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
final int numValues = atLeast(10000); // make sure to have multiple leaves
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[] pointValue = new byte[numBytesPerDim];
final byte[] uniquePointValue = new byte[numBytesPerDim];
random().nextBytes(uniquePointValue);
BKDWriter w = new BKDWriter(numValues, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, maxPointsInLeafNode),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
for (int i = 0; i < numValues; ++i) {
if (i == numValues / 2) {
w.add(uniquePointValue, i);
} else {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
w.add(pointValue, i);
}
}
final long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
Runnable finalizer = w.finish(out, out, out);
indexFP = out.getFilePointer();
finalizer.run();
w.close();
}
IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
pointsIn.seek(indexFP);
BKDReader points = new BKDReader(pointsIn, pointsIn, pointsIn);
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
int numLeaves = numValues / maxPointsInLeafNode;
if (numValues % maxPointsInLeafNode != 0) {
numLeaves++;
}
assertEquals(numLeaves * maxPointsInLeafNode,
points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
}));
// Return 0 if no points match
assertEquals(0,
points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
}));
// If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (FutureArrays.compareUnsigned(uniquePointValue, 0, numBytesPerDim, maxPackedValue, 0, numBytesPerDim) > 0 ||
FutureArrays.compareUnsigned(uniquePointValue, 0, numBytesPerDim, minPackedValue, 0, numBytesPerDim) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
});
assertTrue(""+pointCount,
pointCount == (maxPointsInLeafNode + 1) / 2 || // common case
pointCount == 2*((maxPointsInLeafNode + 1) / 2)); // if the point is a split value
pointsIn.close();
dir.close();
}
public void testTotalPointCountValidation() throws IOException {
Directory dir = newDirectory();
final int numValues = 10;
final int numPointsAdded = 50; // exceeds totalPointCount
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[] pointValue = new byte[numBytesPerDim];
random().nextBytes(pointValue);
MutablePointValues reader = new MutablePointValues() {
@Override
public void intersect(IntersectVisitor visitor) throws IOException {
for(int i=0;i<numPointsAdded;i++) {
visitor.visit(0, pointValue);
}
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMinPackedValue() {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() {
throw new UnsupportedOperationException();
}
@Override
public int getNumIndexDimensions() {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return numPointsAdded;
}
@Override
public int getDocCount() {
return numPointsAdded;
}
@Override
public void swap(int i, int j) {
// do nothing
}
@Override
public int getDocID(int i) {
return 0;
}
@Override
public void getValue(int i, BytesRef packedValue) {
packedValue.bytes = pointValue;
}
@Override
public byte getByteAt(int i, int k) {
BytesRef b = new BytesRef();
getValue(i, b);
return b.bytes[b.offset + k];
}
@Override
public void save(int i, int j) {
throw new UnsupportedOperationException();
}
@Override
public void restore(int i, int j) {
throw new UnsupportedOperationException();
}
};
BKDWriter w = new BKDWriter(numValues, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
expectThrows(IllegalStateException.class, () -> {
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
w.writeField(out, out, out, "test_field_name", reader);
} finally {
w.close();
dir.close();
}
});
}
public void testTooManyPoints() throws Exception {
Directory dir = newDirectory();
final int numValues = 10;
final int numPointsAdded = 50; // exceeds totalPointCount
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[] pointValue = new byte[numBytesPerDim];
BKDWriter w = new BKDWriter(numValues, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, 2),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
for(int i=0;i<numValues;i++) {
random().nextBytes(pointValue);
w.add(pointValue, i);
}
random().nextBytes(pointValue);
IllegalStateException ex = expectThrows(IllegalStateException.class, () -> { w.add(pointValue, numValues);});
assertEquals("totalPointCount=10 was passed when we were created, but we just hit 11 values", ex.getMessage());
w.close();
dir.close();
}
public void testTooManyPoints1D() throws Exception {
Directory dir = newDirectory();
final int numValues = 10;
final int numPointsAdded = 50; // exceeds totalPointCount
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[][] pointValue = new byte[11][numBytesPerDim];
BKDWriter w = new BKDWriter(numValues + 1, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, 2),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
for(int i=0;i<numValues + 1;i++) {
random().nextBytes(pointValue[i]);
}
MutablePointValues val = new MutablePointValues() {
@Override
public void getValue(int i, BytesRef packedValue) {
packedValue.bytes = pointValue[i];
packedValue.offset = 0;
packedValue.length = numBytesPerDim;
}
@Override
public byte getByteAt(int i, int k) {
return pointValue[i][k];
}
@Override
public int getDocID(int i) {
return i;
}
@Override
public void swap(int i, int j) {
byte[] temp = pointValue[i];
pointValue[i] = pointValue[j];
pointValue[j] = temp;
}
@Override
public void intersect(IntersectVisitor visitor) throws IOException {
for (int i = 0; i < size(); i++) {
visitor.visit(i, pointValue[i]);
}
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return 11;
}
@Override
public byte[] getMinPackedValue() {
return new byte[numBytesPerDim];
}
@Override
public byte[] getMaxPackedValue() {
return new byte[numBytesPerDim];
}
@Override
public int getNumDimensions() {
return 1;
}
@Override
public int getNumIndexDimensions() {
return 1;
}
@Override
public int getBytesPerDimension() {
return numBytesPerDim;
}
@Override
public long size() {
return 11;
}
@Override
public int getDocCount() {
return 11;
}
@Override
public void save(int i, int j) {
throw new UnsupportedOperationException();
}
@Override
public void restore(int i, int j) {
throw new UnsupportedOperationException();
}
};
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
IllegalStateException ex = expectThrows(IllegalStateException.class, () -> { w.writeField(out, out, out, "", val);});
assertEquals("totalPointCount=10 was passed when we were created, but we just hit 11 values", ex.getMessage());
w.close();
}
dir.close();
}
}