fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1154c5d..88cf30d 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -11,6 +11,9 @@
This addresses a bug that was introduced in 9.2.0 where having many vectors is not handled well
in the vector connections reader.
+* GITHUB#11939: Fix incorrect cost calculation in DocIdSetBuilder after upgradeToBitSet when doc list is growing.
+ This addresses a bug where the cost of TermRangeQuery/TermInSetQuery and some other queries will be highly underestimated.
+
Improvements
---------------------
* GITHUB#11912, GITHUB#11918: Port generic exception handling from MemorySegmentIndexInput
diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
index 67b3dde..28128af 100644
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@@ -162,12 +162,12 @@
* RoaringDocIdSet.Builder}.
*/
public void add(DocIdSetIterator iter) throws IOException {
+ int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
+ BulkAdder adder = grow(cost);
if (bitSet != null) {
bitSet.or(iter);
return;
}
- int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
- BulkAdder adder = grow(cost);
for (int i = 0; i < cost; ++i) {
int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
index 2fa1465..88dbf24 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
@@ -243,6 +243,18 @@
assertTrue(builder.multivalued);
}
+ public void testCostIsCorrectAfterBitsetUpgrade() throws IOException {
+ final int maxDoc = 1000000;
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+ // 1000000 >> 6 is greater than DocIdSetBuilder.threshold which is 1000000 >> 7
+ for (int i = 0; i < 1000000 >> 6; ++i) {
+ builder.add(DocIdSetIterator.range(i, i + 1));
+ }
+ DocIdSet result = builder.build();
+ assertTrue(result instanceof BitDocIdSet);
+ assertEquals(1000000 >> 6, result.iterator().cost());
+ }
+
private static class DummyTerms extends Terms {
private final int docCount;