blob: f3692af449e33fe1ec31f71e13b893935e043acf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.lucene60;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene84.Lucene84RWCodec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.BasePointsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDConfig;
/** Tests Lucene60PointsFormat */
public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
private final Codec codec;
private final int maxPointsInLeafNode;
public TestLucene60PointsFormat() {
codec = new Lucene84RWCodec();
maxPointsInLeafNode = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
}
@Override
protected Codec getCodec() {
return codec;
}
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Avoid mockRandomMP since it may cause non-optimal merges that make the
// number of points per leaf hard to predict
while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
iwc.setMergePolicy(newMergePolicy());
}
IndexWriter w = new IndexWriter(dir, iwc);
byte[] pointValue = new byte[3];
byte[] uniquePointValue = new byte[3];
random().nextBytes(uniquePointValue);
final int numDocs =
TEST_NIGHTLY ? atLeast(10000) : atLeast(500); // at night, make sure we have several leaves
final boolean multiValues = random().nextBoolean();
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
final int numValues = (multiValues) ? TestUtil.nextInt(random(), 2, 100) : 1;
for (int j = 0; j < numValues; j++) {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
doc.add(new BinaryPoint("f", pointValue));
}
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) points.size() / maxPointsInLeafNode);
IntersectVisitor allPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
};
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(allPointsVisitor));
assertEquals(numDocs, points.estimateDocCount(allPointsVisitor));
IntersectVisitor noPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
};
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(noPointsVisitor));
assertEquals(0, points.estimateDocCount(noPointsVisitor));
IntersectVisitor onePointMatchVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (Arrays.compareUnsigned(uniquePointValue, 0, 3, maxPackedValue, 0, 3) > 0
|| Arrays.compareUnsigned(uniquePointValue, 0, 3, minPackedValue, 0, 3) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
};
// If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(onePointMatchVisitor);
assertTrue(
"" + pointCount,
pointCount == (maxPointsInLeafNode + 1) / 2
|| // common case
pointCount == 2 * ((maxPointsInLeafNode + 1) / 2)); // if the point is a split value
final long docCount = points.estimateDocCount(onePointMatchVisitor);
if (multiValues) {
assertEquals(
docCount,
(long)
(docCount
* (1d
- Math.pow(
(numDocs - pointCount) / points.size(), points.size() / docCount))));
} else {
assertEquals(Math.min(pointCount, numDocs), docCount);
}
r.close();
dir.close();
}
// The tree is always balanced in the N dims case, and leaves are
// not all full so things are a bit different
public void testEstimatePointCount2Dims() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
byte[][] pointValue = new byte[2][];
pointValue[0] = new byte[3];
pointValue[1] = new byte[3];
byte[][] uniquePointValue = new byte[2][];
uniquePointValue[0] = new byte[3];
uniquePointValue[1] = new byte[3];
random().nextBytes(uniquePointValue[0]);
random().nextBytes(uniquePointValue[1]);
final int numDocs =
TEST_NIGHTLY
? atLeast(10000)
: atLeast(1000); // in nightly, make sure we have several leaves
final boolean multiValues = random().nextBoolean();
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
final int numValues = (multiValues) ? TestUtil.nextInt(random(), 2, 100) : 1;
for (int j = 0; j < numValues; j++) {
do {
random().nextBytes(pointValue[0]);
random().nextBytes(pointValue[1]);
} while (Arrays.equals(pointValue[0], uniquePointValue[0])
|| Arrays.equals(pointValue[1], uniquePointValue[1]));
doc.add(new BinaryPoint("f", pointValue));
}
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
IntersectVisitor allPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
};
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) points.size() / maxPointsInLeafNode);
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(allPointsVisitor));
assertEquals(numDocs, points.estimateDocCount(allPointsVisitor));
IntersectVisitor noPointsVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
};
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(noPointsVisitor));
assertEquals(0, points.estimateDocCount(noPointsVisitor));
IntersectVisitor onePointMatchVisitor =
new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {}
@Override
public void visit(int docID) throws IOException {}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
for (int dim = 0; dim < 2; ++dim) {
if (Arrays.compareUnsigned(
uniquePointValue[dim], 0, 3, maxPackedValue, dim * 3, dim * 3 + 3)
> 0
|| Arrays.compareUnsigned(
uniquePointValue[dim], 0, 3, minPackedValue, dim * 3, dim * 3 + 3)
< 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
}
return Relation.CELL_CROSSES_QUERY;
}
};
final long pointCount = points.estimatePointCount(onePointMatchVisitor);
// The number of matches needs to be multiple of count per leaf
final long countPerLeaf = (maxPointsInLeafNode + 1) / 2;
assertTrue("" + pointCount, pointCount % countPerLeaf == 0);
// in extreme cases, a point can be be shared by 4 leaves
assertTrue("" + pointCount, pointCount / countPerLeaf <= 4 && pointCount / countPerLeaf >= 1);
final long docCount = points.estimateDocCount(onePointMatchVisitor);
if (multiValues) {
assertEquals(
docCount,
(long)
(docCount
* (1d
- Math.pow(
(numDocs - pointCount) / points.size(), points.size() / docCount))));
} else {
assertEquals(Math.min(pointCount, numDocs), docCount);
}
r.close();
dir.close();
}
public void testDocCountEdgeCases() {
PointValues values = getPointValues(Long.MAX_VALUE, 1, Long.MAX_VALUE);
long docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, 1, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE / 2);
docs = values.estimateDocCount(null);
assertEquals(Integer.MAX_VALUE, docs);
values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, 1);
docs = values.estimateDocCount(null);
assertEquals(1, docs);
}
public void testRandomDocCount() {
for (int i = 0; i < 100; i++) {
long size = TestUtil.nextLong(random(), 1, Long.MAX_VALUE);
int maxDoc = (size > Integer.MAX_VALUE) ? Integer.MAX_VALUE : Math.toIntExact(size);
int docCount = TestUtil.nextInt(random(), 1, maxDoc);
long estimatedPointCount = TestUtil.nextLong(random(), 0, size);
PointValues values = getPointValues(size, docCount, estimatedPointCount);
long docs = values.estimateDocCount(null);
assertTrue(docs <= estimatedPointCount);
assertTrue(docs <= maxDoc);
assertTrue(docs >= estimatedPointCount / (size / docCount));
}
}
private PointValues getPointValues(long size, int docCount, long estimatedPointCount) {
return new PointValues() {
@Override
public void intersect(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatedPointCount;
}
@Override
public byte[] getMinPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getNumIndexDimensions() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return size;
}
@Override
public int getDocCount() {
return docCount;
}
};
}
}