| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.backward_codecs.lucene60; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import org.apache.lucene.backward_codecs.lucene84.Lucene84RWCodec; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.document.BinaryPoint; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.index.BasePointsFormatTestCase; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.MockRandomMergePolicy; |
| import org.apache.lucene.index.PointValues; |
| import org.apache.lucene.index.PointValues.IntersectVisitor; |
| import org.apache.lucene.index.PointValues.Relation; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.TestUtil; |
| import org.apache.lucene.util.bkd.BKDConfig; |
| |
| /** Tests Lucene60PointsFormat */ |
| public class TestLucene60PointsFormat extends BasePointsFormatTestCase { |
| private final Codec codec; |
| private final int maxPointsInLeafNode; |
| |
| public TestLucene60PointsFormat() { |
| codec = new Lucene84RWCodec(); |
| maxPointsInLeafNode = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE; |
| } |
| |
| @Override |
| protected Codec getCodec() { |
| return codec; |
| } |
| |
| public void testEstimatePointCount() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = newIndexWriterConfig(); |
| // Avoid mockRandomMP since it may cause non-optimal merges that make the |
| // number of points per leaf hard to predict |
| while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) { |
| iwc.setMergePolicy(newMergePolicy()); |
| } |
| IndexWriter w = new IndexWriter(dir, iwc); |
| byte[] pointValue = new byte[3]; |
| byte[] uniquePointValue = new byte[3]; |
| random().nextBytes(uniquePointValue); |
| final int numDocs = |
| TEST_NIGHTLY ? atLeast(10000) : atLeast(500); // at night, make sure we have several leaves |
| final boolean multiValues = random().nextBoolean(); |
| for (int i = 0; i < numDocs; ++i) { |
| Document doc = new Document(); |
| if (i == numDocs / 2) { |
| doc.add(new BinaryPoint("f", uniquePointValue)); |
| } else { |
| final int numValues = (multiValues) ? TestUtil.nextInt(random(), 2, 100) : 1; |
| for (int j = 0; j < numValues; j++) { |
| do { |
| random().nextBytes(pointValue); |
| } while (Arrays.equals(pointValue, uniquePointValue)); |
| doc.add(new BinaryPoint("f", pointValue)); |
| } |
| } |
| w.addDocument(doc); |
| } |
| w.forceMerge(1); |
| final IndexReader r = DirectoryReader.open(w); |
| w.close(); |
| final LeafReader lr = getOnlyLeafReader(r); |
| PointValues points = lr.getPointValues("f"); |
| |
| // If all points match, then the point count is numLeaves * maxPointsInLeafNode |
| final int numLeaves = (int) Math.ceil((double) points.size() / maxPointsInLeafNode); |
| |
| IntersectVisitor allPointsVisitor = |
| new IntersectVisitor() { |
| @Override |
| public void visit(int docID, byte[] packedValue) throws IOException {} |
| |
| @Override |
| public void visit(int docID) throws IOException {} |
| |
| @Override |
| public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| return Relation.CELL_INSIDE_QUERY; |
| } |
| }; |
| |
| assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(allPointsVisitor)); |
| assertEquals(numDocs, points.estimateDocCount(allPointsVisitor)); |
| |
| IntersectVisitor noPointsVisitor = |
| new IntersectVisitor() { |
| @Override |
| public void visit(int docID, byte[] packedValue) throws IOException {} |
| |
| @Override |
| public void visit(int docID) throws IOException {} |
| |
| @Override |
| public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| return Relation.CELL_OUTSIDE_QUERY; |
| } |
| }; |
| |
| // Return 0 if no points match |
| assertEquals(0, points.estimatePointCount(noPointsVisitor)); |
| assertEquals(0, points.estimateDocCount(noPointsVisitor)); |
| |
| IntersectVisitor onePointMatchVisitor = |
| new IntersectVisitor() { |
| @Override |
| public void visit(int docID, byte[] packedValue) throws IOException {} |
| |
| @Override |
| public void visit(int docID) throws IOException {} |
| |
| @Override |
| public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| if (Arrays.compareUnsigned(uniquePointValue, 0, 3, maxPackedValue, 0, 3) > 0 |
| || Arrays.compareUnsigned(uniquePointValue, 0, 3, minPackedValue, 0, 3) < 0) { |
| return Relation.CELL_OUTSIDE_QUERY; |
| } |
| return Relation.CELL_CROSSES_QUERY; |
| } |
| }; |
| |
| // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2 |
| // in general, or maybe 2x that if the point is a split value |
| final long pointCount = points.estimatePointCount(onePointMatchVisitor); |
| assertTrue( |
| "" + pointCount, |
| pointCount == (maxPointsInLeafNode + 1) / 2 |
| || // common case |
| pointCount == 2 * ((maxPointsInLeafNode + 1) / 2)); // if the point is a split value |
| |
| final long docCount = points.estimateDocCount(onePointMatchVisitor); |
| |
| if (multiValues) { |
| assertEquals( |
| docCount, |
| (long) |
| (docCount |
| * (1d |
| - Math.pow( |
| (numDocs - pointCount) / points.size(), points.size() / docCount)))); |
| } else { |
| assertEquals(Math.min(pointCount, numDocs), docCount); |
| } |
| r.close(); |
| dir.close(); |
| } |
| |
| // The tree is always balanced in the N dims case, and leaves are |
| // not all full so things are a bit different |
| public void testEstimatePointCount2Dims() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); |
| byte[][] pointValue = new byte[2][]; |
| pointValue[0] = new byte[3]; |
| pointValue[1] = new byte[3]; |
| byte[][] uniquePointValue = new byte[2][]; |
| uniquePointValue[0] = new byte[3]; |
| uniquePointValue[1] = new byte[3]; |
| random().nextBytes(uniquePointValue[0]); |
| random().nextBytes(uniquePointValue[1]); |
| final int numDocs = |
| TEST_NIGHTLY |
| ? atLeast(10000) |
| : atLeast(1000); // in nightly, make sure we have several leaves |
| final boolean multiValues = random().nextBoolean(); |
| for (int i = 0; i < numDocs; ++i) { |
| Document doc = new Document(); |
| if (i == numDocs / 2) { |
| doc.add(new BinaryPoint("f", uniquePointValue)); |
| } else { |
| final int numValues = (multiValues) ? TestUtil.nextInt(random(), 2, 100) : 1; |
| for (int j = 0; j < numValues; j++) { |
| do { |
| random().nextBytes(pointValue[0]); |
| random().nextBytes(pointValue[1]); |
| } while (Arrays.equals(pointValue[0], uniquePointValue[0]) |
| || Arrays.equals(pointValue[1], uniquePointValue[1])); |
| doc.add(new BinaryPoint("f", pointValue)); |
| } |
| } |
| w.addDocument(doc); |
| } |
| w.forceMerge(1); |
| final IndexReader r = DirectoryReader.open(w); |
| w.close(); |
| final LeafReader lr = getOnlyLeafReader(r); |
| PointValues points = lr.getPointValues("f"); |
| |
| IntersectVisitor allPointsVisitor = |
| new IntersectVisitor() { |
| @Override |
| public void visit(int docID, byte[] packedValue) throws IOException {} |
| |
| @Override |
| public void visit(int docID) throws IOException {} |
| |
| @Override |
| public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| return Relation.CELL_INSIDE_QUERY; |
| } |
| }; |
| |
| // If all points match, then the point count is numLeaves * maxPointsInLeafNode |
| final int numLeaves = (int) Math.ceil((double) points.size() / maxPointsInLeafNode); |
| |
| assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(allPointsVisitor)); |
| assertEquals(numDocs, points.estimateDocCount(allPointsVisitor)); |
| |
| IntersectVisitor noPointsVisitor = |
| new IntersectVisitor() { |
| @Override |
| public void visit(int docID, byte[] packedValue) throws IOException {} |
| |
| @Override |
| public void visit(int docID) throws IOException {} |
| |
| @Override |
| public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| return Relation.CELL_OUTSIDE_QUERY; |
| } |
| }; |
| |
| // Return 0 if no points match |
| assertEquals(0, points.estimatePointCount(noPointsVisitor)); |
| assertEquals(0, points.estimateDocCount(noPointsVisitor)); |
| |
| IntersectVisitor onePointMatchVisitor = |
| new IntersectVisitor() { |
| @Override |
| public void visit(int docID, byte[] packedValue) throws IOException {} |
| |
| @Override |
| public void visit(int docID) throws IOException {} |
| |
| @Override |
| public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { |
| for (int dim = 0; dim < 2; ++dim) { |
| if (Arrays.compareUnsigned( |
| uniquePointValue[dim], 0, 3, maxPackedValue, dim * 3, dim * 3 + 3) |
| > 0 |
| || Arrays.compareUnsigned( |
| uniquePointValue[dim], 0, 3, minPackedValue, dim * 3, dim * 3 + 3) |
| < 0) { |
| return Relation.CELL_OUTSIDE_QUERY; |
| } |
| } |
| return Relation.CELL_CROSSES_QUERY; |
| } |
| }; |
| |
| final long pointCount = points.estimatePointCount(onePointMatchVisitor); |
| // The number of matches needs to be multiple of count per leaf |
| final long countPerLeaf = (maxPointsInLeafNode + 1) / 2; |
| assertTrue("" + pointCount, pointCount % countPerLeaf == 0); |
| // in extreme cases, a point can be be shared by 4 leaves |
| assertTrue("" + pointCount, pointCount / countPerLeaf <= 4 && pointCount / countPerLeaf >= 1); |
| |
| final long docCount = points.estimateDocCount(onePointMatchVisitor); |
| if (multiValues) { |
| assertEquals( |
| docCount, |
| (long) |
| (docCount |
| * (1d |
| - Math.pow( |
| (numDocs - pointCount) / points.size(), points.size() / docCount)))); |
| } else { |
| assertEquals(Math.min(pointCount, numDocs), docCount); |
| } |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testDocCountEdgeCases() { |
| PointValues values = getPointValues(Long.MAX_VALUE, 1, Long.MAX_VALUE); |
| long docs = values.estimateDocCount(null); |
| assertEquals(1, docs); |
| values = getPointValues(Long.MAX_VALUE, 1, 1); |
| docs = values.estimateDocCount(null); |
| assertEquals(1, docs); |
| values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE); |
| docs = values.estimateDocCount(null); |
| assertEquals(Integer.MAX_VALUE, docs); |
| values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE / 2); |
| docs = values.estimateDocCount(null); |
| assertEquals(Integer.MAX_VALUE, docs); |
| values = getPointValues(Long.MAX_VALUE, Integer.MAX_VALUE, 1); |
| docs = values.estimateDocCount(null); |
| assertEquals(1, docs); |
| } |
| |
| public void testRandomDocCount() { |
| for (int i = 0; i < 100; i++) { |
| long size = TestUtil.nextLong(random(), 1, Long.MAX_VALUE); |
| int maxDoc = (size > Integer.MAX_VALUE) ? Integer.MAX_VALUE : Math.toIntExact(size); |
| int docCount = TestUtil.nextInt(random(), 1, maxDoc); |
| long estimatedPointCount = TestUtil.nextLong(random(), 0, size); |
| PointValues values = getPointValues(size, docCount, estimatedPointCount); |
| long docs = values.estimateDocCount(null); |
| assertTrue(docs <= estimatedPointCount); |
| assertTrue(docs <= maxDoc); |
| assertTrue(docs >= estimatedPointCount / (size / docCount)); |
| } |
| } |
| |
| private PointValues getPointValues(long size, int docCount, long estimatedPointCount) { |
| return new PointValues() { |
| @Override |
| public void intersect(IntersectVisitor visitor) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long estimatePointCount(IntersectVisitor visitor) { |
| return estimatedPointCount; |
| } |
| |
| @Override |
| public byte[] getMinPackedValue() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public byte[] getMaxPackedValue() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int getNumDimensions() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int getNumIndexDimensions() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int getBytesPerDimension() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long size() { |
| return size; |
| } |
| |
| @Override |
| public int getDocCount() { |
| return docCount; |
| } |
| }; |
| } |
| } |