be/test/format/condition_cache_test.cpp - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "storage/segment/condition_cache.h"

 #include <gtest/gtest.h>

 #include <memory>
 #include <string>
 #include <vector>

 #include "common/status.h"
 #include "format/generic_reader.h"
 #include "format/orc/vorc_reader.h"
 #include "format/parquet/vparquet_reader.h"
 #include "format/table/transactional_hive_common.h"

 namespace doris::vectorized {

 constexpr int GS = ConditionCacheContext::GRANULE_SIZE; // 2048

 class FilterRangesByCacheTest : public testing::Test {};

 // Single contiguous range, first_row = 0, alternating true/false granules.
 TEST_F(FilterRangesByCacheTest, SingleRangeAlternatingGranules) {
     // 4 full granules = 8192 rows, range [0, 8192)
     RowRanges ranges;
     ranges.add(RowRange(0, 4 * GS));
     // granule 0=true, 1=false, 2=true, 3=false
     std::vector<bool> cache = {true, false, true, false};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, /*first_row=*/0);

     // Expect granules 0 and 2 kept: [0, 2048), [4096, 6144)
     EXPECT_EQ(result.range_size(), 2);
     EXPECT_EQ(result.count(), 2 * GS);
     EXPECT_EQ(result.get_range_from(0), 0);
     EXPECT_EQ(result.get_range_to(0), GS);
     EXPECT_EQ(result.get_range_from(1), 2 * GS);
     EXPECT_EQ(result.get_range_to(1), 3 * GS);
 }

 // All granules true -> ranges unchanged.
 TEST_F(FilterRangesByCacheTest, AllGranulesTrue) {
     RowRanges ranges;
     ranges.add(RowRange(0, 3 * GS));
     std::vector<bool> cache = {true, true, true};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     EXPECT_EQ(result.range_size(), 1);
     EXPECT_EQ(result.count(), 3 * GS);
     EXPECT_EQ(result.get_range_from(0), 0);
     EXPECT_EQ(result.get_range_to(0), 3 * GS);
 }

 // All granules false -> empty ranges.
 TEST_F(FilterRangesByCacheTest, AllGranulesFalse) {
     RowRanges ranges;
     ranges.add(RowRange(0, 3 * GS));
     std::vector<bool> cache = {false, false, false};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     EXPECT_EQ(result.range_size(), 0);
     EXPECT_EQ(result.count(), 0);
 }

 // first_row offset shifts granule boundaries.
 TEST_F(FilterRangesByCacheTest, NonZeroFirstRow) {
     // first_row = 1024, range [0, 4096) -> 4096 rows
     // Sequential positions 0..4095, global_seq = 1024..5119
     // granule 0 (global 0..2047): seq 0..1023 -> range [0, 1024)
     // granule 1 (global 2048..4095): seq 1024..3071 -> range [1024, 3072)
     // granule 2 (global 4096..6143): seq 3072..4095 -> range [3072, 4096)
     RowRanges ranges;
     ranges.add(RowRange(0, 4096));
     // granule 0=false, 1=true, 2=false
     std::vector<bool> cache = {false, true, false};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, /*first_row=*/1024);

     // Only granule 1 kept: rows with global_seq in [2048, 4096) -> range [1024, 3072)
     EXPECT_EQ(result.range_size(), 1);
     EXPECT_EQ(result.count(), 2048);
     EXPECT_EQ(result.get_range_from(0), 1024);
     EXPECT_EQ(result.get_range_to(0), 3072);
 }

 // Range that doesn't start at 0 (from page index filtering).
 TEST_F(FilterRangesByCacheTest, RangeNotStartingAtZero) {
     // Range [2048, 6144) = 4096 rows, first_row = 0
     // Granule 0 (false): covers rg-relative [0, 2048) — no overlap with [2048, 6144)
     // Granule 1 (true): covers rg-relative [2048, 4096) — kept
     // Beyond cache: [4096, 6144) kept conservatively
     RowRanges ranges;
     ranges.add(RowRange(2048, 6144));
     std::vector<bool> cache = {false, true};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     // False granule [0, 2048) doesn't overlap [2048, 6144), so nothing is filtered
     EXPECT_EQ(result.range_size(), 1);
     EXPECT_EQ(result.count(), 4096);
     EXPECT_EQ(result.get_range_from(0), 2048);
     EXPECT_EQ(result.get_range_to(0), 6144);
 }

 // Multiple non-contiguous ranges (from page index filtering) with a single-entry cache.
 TEST_F(FilterRangesByCacheTest, NonContiguousRangesGranuleSpansGap) {
     // Ranges: [0, 1000), [5000, 6000) = 2000 total rows, first_row = 0
     // Granule 0 covers rg-relative [0, 2048) — only overlaps [0, 1000)
     // [5000, 6000) is in granule 2 ([4096, 6144)) which is beyond cache -> kept conservatively
     RowRanges ranges;
     ranges.add(RowRange(0, 1000));
     ranges.add(RowRange(5000, 6000));

     // Granule 0 = false -> discard [0, 1000); [5000, 6000) kept (beyond cache)
     std::vector<bool> cache = {false};
     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);
     EXPECT_EQ(result.count(), 1000);
     EXPECT_EQ(result.range_size(), 1);
     EXPECT_EQ(result.get_range_from(0), 5000);
     EXPECT_EQ(result.get_range_to(0), 6000);

     // Granule 0 = true -> keep all
     std::vector<bool> cache2 = {true};
     result = RowGroupReader::filter_ranges_by_cache(ranges, cache2, 0);
     EXPECT_EQ(result.count(), 2000);
     EXPECT_EQ(result.range_size(), 2);
 }

 // Non-contiguous ranges where granule boundaries fall within ranges.
 TEST_F(FilterRangesByCacheTest, NonContiguousRangesMultipleGranules) {
     // Ranges: [0, 3000), [8000, 11000) = 6000 total rows, first_row = 0
     // Granule 0 (false): rg-relative [0, 2048) — overlaps [0, 2048) of first range
     // Granule 1 (true):  rg-relative [2048, 4096) — overlaps [2048, 3000) of first range
     // Granule 2 (false): rg-relative [4096, 6144) — no overlap with either range
     // [8000, 11000) is in granules 3-5, all beyond cache -> kept conservatively
     RowRanges ranges;
     ranges.add(RowRange(0, 3000));
     ranges.add(RowRange(8000, 11000));

     // granule 0=false, 1=true, 2=false
     std::vector<bool> cache = {false, true, false};
     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     // Granule 0 removes [0, 2048) from [0, 3000) -> [2048, 3000) kept
     // Granule 2 [4096, 6144) doesn't overlap [8000, 11000) -> [8000, 11000) kept
     EXPECT_EQ(result.range_size(), 2);
     EXPECT_EQ(result.get_range_from(0), 2048);
     EXPECT_EQ(result.get_range_to(0), 3000);
     EXPECT_EQ(result.get_range_from(1), 8000);
     EXPECT_EQ(result.get_range_to(1), 11000);
     EXPECT_EQ(result.count(), (3000 - 2048) + (11000 - 8000)); // 952 + 3000 = 3952
 }

 // Cache smaller than the actual row range -> out-of-range granules kept conservatively.
 TEST_F(FilterRangesByCacheTest, CacheSmallerThanRange) {
     // 4 granules of rows, cache only covers 2
     RowRanges ranges;
     ranges.add(RowRange(0, 4 * GS));
     std::vector<bool> cache = {false, true}; // only 2 entries

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     // Granule 0 = false -> skip; granule 1 = true -> keep
     // Granule 2, 3 beyond cache -> kept conservatively
     EXPECT_EQ(result.range_size(), 1); // [GS, 4*GS) merged since granules 1,2,3 all kept
     EXPECT_EQ(result.count(), 3 * GS);
     EXPECT_EQ(result.get_range_from(0), GS);
     EXPECT_EQ(result.get_range_to(0), 4 * GS);
 }

 // Partial granule at the end of a range.
 TEST_F(FilterRangesByCacheTest, PartialGranuleAtEnd) {
     // Range [0, 3000) = 3000 rows, first_row = 0
     // Granule 0: seq [0, 2048) -> [0, 2048)
     // Granule 1: seq [2048, 3000) -> [2048, 3000) (partial, only 952 rows)
     RowRanges ranges;
     ranges.add(RowRange(0, 3000));
     std::vector<bool> cache = {true, false};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     // Only granule 0 kept
     EXPECT_EQ(result.range_size(), 1);
     EXPECT_EQ(result.count(), GS);
     EXPECT_EQ(result.get_range_from(0), 0);
     EXPECT_EQ(result.get_range_to(0), GS);
 }

 // Empty ranges input.
 TEST_F(FilterRangesByCacheTest, EmptyRanges) {
     RowRanges ranges;
     std::vector<bool> cache = {true, false, true};

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0);

     EXPECT_EQ(result.range_size(), 0);
     EXPECT_EQ(result.count(), 0);
 }

 // Large first_row offset (simulating second row group in file).
 TEST_F(FilterRangesByCacheTest, LargeFirstRowOffset) {
     // first_row = 100000 (second row group starts here)
     // Range [0, 2048) = one full granule
     // global_seq = 100000 + 0 = 100000, granule = 100000/2048 = 48
     int64_t first_row = 100000;
     RowRanges ranges;
     ranges.add(RowRange(0, GS));
     std::vector<bool> cache(50, false); // 50 granules, all false
     cache[48] = true;                   // granule 48 = true

     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, first_row);

     // global_seq for first chunk: 100000, granule 48 = true -> keep
     // But first chunk may not be the full range if 100000 is not aligned...
     // 100000 / 2048 = 48, 49*2048 = 100352, so rows_to_granule_end = 100352 - 100000 = 352
     // chunk 1: [0, 352) -> granule 48 = true -> keep
     // chunk 2: [352, 2048) -> global_seq = 100352 -> granule 49 = false -> discard
     EXPECT_EQ(result.count(), 352);
     EXPECT_EQ(result.range_size(), 1);
     EXPECT_EQ(result.get_range_from(0), 0);
     EXPECT_EQ(result.get_range_to(0), 352);
 }

 // ============================================================
 // Tests for cache vector pre-allocation with +1 safety margin
 // when first_row is not aligned to granule boundary.
 // ============================================================

 class CachePreAllocTest : public testing::Test {};

 // When first_row is not aligned to granule boundary, pre-allocating
 // ceil(total_rows / GS) + 1 guarantees coverage of all granules.
 TEST_F(CachePreAllocTest, PlusOneCoversUnalignedFirstRow) {
     // Simulates: Scanner reads RG2+RG3 of a file:
     //   RG2: 5000 rows (row 20000~24999), RG3: 5000 rows (row 25000~29999)
     //   first_assigned_row = 20000, total_rows = 10000
     //
     // Pre-allocation: ceil(10000 / 2048) + 1 = 5 + 1 = 6
     // base_granule = 20000 / 2048 = 9
     // last_granule = ceil(30000 / 2048) = 15
     // needed = 15 - 9 = 6  <=  6 (pre-allocated)  → sufficient!
     constexpr int64_t GS = ConditionCacheContext::GRANULE_SIZE; // 2048
     int64_t first_assigned_row = 20000;
     int64_t total_rows = 10000;

     // Step 1: simulate pre-allocation with +1 (as FileScanner now does)
     int64_t pre_alloc = (total_rows + GS - 1) / GS + 1; // ceil(10000/2048) + 1 = 6
     EXPECT_EQ(pre_alloc, 6);
     std::vector<bool> cache(pre_alloc, false);

     // Step 2: compute base_granule (as set_condition_cache_context does)
     int64_t base_granule = first_assigned_row / GS; // 9
     EXPECT_EQ(base_granule, 9);

     // Step 3: verify all granules are coverable
     int64_t last_granule = (first_assigned_row + total_rows + GS - 1) / GS; // 15
     int64_t needed = last_granule - base_granule;                           // 6
     EXPECT_LE(static_cast<size_t>(needed), cache.size());

     // Step 4: mark all granules as true and verify HIT keeps all rows
     cache.assign(cache.size(), true);
     RowRanges ranges;
     ranges.add(RowRange(0, total_rows));
     auto result =
             RowGroupReader::filter_ranges_by_cache(ranges, cache, first_assigned_row, base_granule);
     EXPECT_EQ(result.count(), total_rows);
 }

 // Verify the last granule (cache_idx = needed-1) is reachable after +1 allocation.
 TEST_F(CachePreAllocTest, LastGranuleIsReachable) {
     constexpr int64_t GS = ConditionCacheContext::GRANULE_SIZE;
     int64_t first_assigned_row = 20000;
     int64_t total_rows = 10000;

     int64_t pre_alloc = (total_rows + GS - 1) / GS + 1;
     std::vector<bool> cache(pre_alloc, false);
     int64_t base_granule = first_assigned_row / GS;

     // Simulate marking from _mark_condition_cache_granules for a row
     // in the last granule. E.g., global row 29000
     int64_t global_row = 29000;
     size_t granule = global_row / GS;          // 29000 / 2048 = 14
     size_t cache_idx = granule - base_granule; // 14 - 9 = 5

     // Without +1, cache.size() would be 5 and cache_idx=5 would be out of bounds
     // With +1, cache.size() is 6 and cache_idx=5 is valid
     EXPECT_LT(cache_idx, cache.size());
     cache[cache_idx] = true;
     EXPECT_TRUE(cache[cache_idx]);
 }

 // Verify +1 is sufficient for various misaligned first_row values.
 TEST_F(CachePreAllocTest, PlusOneSufficientForVariousMisalignments) {
     constexpr int64_t GS = ConditionCacheContext::GRANULE_SIZE;

     struct TestCase {
         int64_t first_row;
         int64_t total_rows;
     };
     std::vector<TestCase> cases = {
             {.first_row = 20000, .total_rows = 10000},  // original example
             {.first_row = 1024, .total_rows = 4096},    // small offset
             {.first_row = 3000, .total_rows = 8000},    // another misalignment
             {.first_row = 2047, .total_rows = 2049},    // just before boundary, spans 3 granules
             {.first_row = 0, .total_rows = 10000},      // aligned (+1 is wasted but harmless)
             {.first_row = 1, .total_rows = 2048},       // off-by-one at start
             {.first_row = 100000, .total_rows = 10000}, // large offset (second row group)
     };

     for (auto& tc : cases) {
         int64_t last_row = tc.first_row + tc.total_rows;
         int64_t pre_alloc = (tc.total_rows + GS - 1) / GS + 1;
         int64_t base_granule = tc.first_row / GS;
         int64_t last_granule = (last_row + GS - 1) / GS;
         int64_t needed = last_granule - base_granule;

         EXPECT_LE(needed, pre_alloc)
                 << "first_row=" << tc.first_row << " total_rows=" << tc.total_rows;

         // Verify HIT correctness
         std::vector<bool> cache(pre_alloc, true);
         RowRanges ranges;
         ranges.add(RowRange(0, tc.total_rows));
         auto result =
                 RowGroupReader::filter_ranges_by_cache(ranges, cache, tc.first_row, base_granule);
         EXPECT_EQ(result.count(), tc.total_rows)
                 << "first_row=" << tc.first_row << " total_rows=" << tc.total_rows;
     }
 }

 // Extra +1 element beyond actual data range doesn't cause incorrect filtering.
 TEST_F(CachePreAllocTest, ExtraElementDoesNotCauseIncorrectFiltering) {
     // Aligned case: first_row=0, total_rows=4096 (exactly 2 granules)
     // Pre-alloc = 2 + 1 = 3. The 3rd element (cache[2]) is beyond data range.
     std::vector<bool> cache = {true, true, false}; // extra false at end

     RowRanges ranges;
     ranges.add(RowRange(0, 4096));
     auto result = RowGroupReader::filter_ranges_by_cache(ranges, cache, 0, 0);

     // The extra false granule covers rg-relative [4096, 6144) which doesn't
     // overlap [0, 4096), so all rows should be kept.
     EXPECT_EQ(result.count(), 4096);
 }

 // ============================================================
 // Mock / Testable reader classes
 // ============================================================

 // GenericReader whose has_delete_operations() result is configurable,
 // used to test condition cache skip logic for various delete scenarios.
 class MockFileFormatReader : public GenericReader {
 public:
     bool mock_has_deletes = false;
     Status _do_get_next_block(Block*, size_t*, bool*) override { return Status::OK(); }
     bool has_delete_operations() const override { return mock_has_deletes; }
 };
 // ============================================================
 // These tests reproduce the logic from
 // FileScanner::_init_reader_condition_cache() (file_scanner.cpp)
 // using real ConditionCache + real reader instances.
 // ============================================================

 class ConditionCacheDeleteOpsTest : public testing::Test {
 protected:
     void SetUp() override {
         _cache.reset(segment_v2::ConditionCache::create_global_cache(10 * 1024 * 1024, 4));
     }

     void TearDown() override { _cache.reset(); }

     // Reproduces the exact logic from FileScanner::_init_reader_condition_cache().
     // Returns whether the condition cache context was created (i.e. cache was not skipped).
     void simulate_init_condition_cache(GenericReader* reader, uint64_t digest,
                                        const std::string& path,
                                        /*out*/ bool& cache_hit,
                                        /*out*/ std::shared_ptr<std::vector<bool>>& cache,
                                        /*out*/ std::shared_ptr<ConditionCacheContext>& ctx) {
         cache_hit = false;
         cache = nullptr;
         ctx = nullptr;

         // Mirrors: if (_condition_cache_digest == 0 || _is_load) return;
         if (digest == 0) {
             return;
         }

         // Mirrors: if (_cur_reader && _cur_reader->has_delete_operations()) return;
         if (reader && reader->has_delete_operations()) {
             return;
         }

         auto* cc = _cache.get();
         if (cc == nullptr) {
             return;
         }

         segment_v2::ConditionCache::ExternalCacheKey key(path, -1, 0, digest, 0, -1);

         segment_v2::ConditionCacheHandle handle;
         cache_hit = cc->lookup(key, &handle);
         if (cache_hit) {
             cache = handle.get_filter_result();
         } else {
             cache = std::make_shared<std::vector<bool>>();
         }

         ctx = std::make_shared<ConditionCacheContext>();
         ctx->is_hit = cache_hit;
         ctx->filter_result = cache;
     }

     // Inserts a pre-populated entry into the cache for the given path/digest.
     void prepopulate_cache(const std::string& path, uint64_t digest) {
         segment_v2::ConditionCache::ExternalCacheKey key(path, -1, 0, digest, 0, -1);
         auto filter = std::make_shared<std::vector<bool>>(std::vector<bool> {true, false, true});
         _cache->insert(key, filter);
     }

     std::unique_ptr<segment_v2::ConditionCache> _cache;
 };

 // -- ParquetReader: no deletes -> cache populated (MISS) --
 TEST_F(ConditionCacheDeleteOpsTest, ParquetNoDeletes_CachePopulated) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = ParquetReader::create_unique(params, range, nullptr, nullptr);

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 42, "/data/file.parquet", hit, cache, ctx);

     EXPECT_FALSE(hit);
     EXPECT_NE(ctx, nullptr);
     EXPECT_NE(cache, nullptr);
     EXPECT_FALSE(ctx->is_hit);
 }

 // -- ParquetReader: with position deletes -> cache skipped --
 TEST_F(ConditionCacheDeleteOpsTest, ParquetWithPositionDeletes_CacheSkipped) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = ParquetReader::create_unique(params, range, nullptr, nullptr);
     std::vector<int64_t> deletes = {1, 5, 10};
     reader->set_delete_rows(&deletes);

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 42, "/data/file.parquet", hit, cache, ctx);

     EXPECT_EQ(ctx, nullptr);
     EXPECT_EQ(cache, nullptr);
 }

 // -- OrcReader: no deletes -> cache populated (MISS) --
 TEST_F(ConditionCacheDeleteOpsTest, OrcNoDeletes_CachePopulated) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr);

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 99, "/data/file.orc", hit, cache, ctx);

     EXPECT_FALSE(hit);
     EXPECT_NE(ctx, nullptr);
     EXPECT_NE(cache, nullptr);
     EXPECT_FALSE(ctx->is_hit);
 }

 // -- OrcReader: with position deletes -> cache skipped --
 TEST_F(ConditionCacheDeleteOpsTest, OrcWithPositionDeletes_CacheSkipped) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr);
     std::vector<int64_t> pos_deletes = {0, 3, 7};
     reader->set_position_delete_rowids(&pos_deletes);

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 99, "/data/file.orc", hit, cache, ctx);

     EXPECT_EQ(ctx, nullptr);
     EXPECT_EQ(cache, nullptr);
 }

 // -- OrcReader: with ACID deletes -> cache skipped --
 TEST_F(ConditionCacheDeleteOpsTest, OrcWithAcidDeletes_CacheSkipped) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr);
     AcidRowIDSet acid_deletes;
     acid_deletes.insert({1, 0, 5});
     reader->set_delete_rows(&acid_deletes);

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 99, "/data/file.orc", hit, cache, ctx);

     EXPECT_EQ(ctx, nullptr);
     EXPECT_EQ(cache, nullptr);
 }

 // -- MockReader: with deletes (simulating Iceberg/Hive with inner deletes) -> cache skipped --
 // In the new architecture, Iceberg readers inherit ParquetReader/OrcReader directly (CRTP),
 // so has_delete_operations() is resolved through the base reader. We use MockFileFormatReader
 // to test the generic condition cache skip logic.
 TEST_F(ConditionCacheDeleteOpsTest, ReaderWithDeletes_CacheSkipped) {
     auto reader = std::make_unique<MockFileFormatReader>();
     reader->mock_has_deletes = true;

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 42, "/data/iceberg.parquet", hit, cache, ctx);

     EXPECT_EQ(ctx, nullptr);
     EXPECT_EQ(cache, nullptr);
 }

 // -- MockReader: no deletes -> cache populated --
 TEST_F(ConditionCacheDeleteOpsTest, ReaderWithoutDeletes_CachePopulated) {
     auto reader = std::make_unique<MockFileFormatReader>();
     reader->mock_has_deletes = false;

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 42, "/data/iceberg.parquet", hit, cache, ctx);

     EXPECT_FALSE(hit);
     EXPECT_NE(ctx, nullptr);
     EXPECT_NE(cache, nullptr);
     EXPECT_FALSE(ctx->is_hit);
 }

 // -- Pre-populated cache entry is NOT returned when deletes exist --
 TEST_F(ConditionCacheDeleteOpsTest, CacheHitSkippedWhenDeletesExist) {
     const std::string path = "/data/cached_file.parquet";
     const uint64_t digest = 123;

     // Insert a cache entry
     prepopulate_cache(path, digest);

     // Verify it would be a hit without deletes
     {
         TFileScanRangeParams params;
         TFileRangeDesc range;
         auto reader = ParquetReader::create_unique(params, range, nullptr, nullptr);

         bool hit = false;
         std::shared_ptr<std::vector<bool>> cache;
         std::shared_ptr<ConditionCacheContext> ctx;
         simulate_init_condition_cache(reader.get(), digest, path, hit, cache, ctx);

         EXPECT_TRUE(hit);
         EXPECT_NE(ctx, nullptr);
         EXPECT_TRUE(ctx->is_hit);
         EXPECT_NE(cache, nullptr);
         EXPECT_EQ(cache->size(), 3);
     }

     // Now with deletes: cache entry should NOT be returned
     {
         TFileScanRangeParams params;
         TFileRangeDesc range;
         auto reader = ParquetReader::create_unique(params, range, nullptr, nullptr);
         std::vector<int64_t> deletes = {1, 2, 3};
         reader->set_delete_rows(&deletes);

         bool hit = false;
         std::shared_ptr<std::vector<bool>> cache;
         std::shared_ptr<ConditionCacheContext> ctx;
         simulate_init_condition_cache(reader.get(), digest, path, hit, cache, ctx);

         EXPECT_EQ(ctx, nullptr);
         EXPECT_EQ(cache, nullptr);
         EXPECT_FALSE(hit);
     }
 }

 // -- Zero digest always skips cache, even without deletes --
 TEST_F(ConditionCacheDeleteOpsTest, ZeroDigest_CacheAlwaysSkipped) {
     TFileScanRangeParams params;
     TFileRangeDesc range;
     auto reader = ParquetReader::create_unique(params, range, nullptr, nullptr);

     bool hit = false;
     std::shared_ptr<std::vector<bool>> cache;
     std::shared_ptr<ConditionCacheContext> ctx;
     simulate_init_condition_cache(reader.get(), 0, "/data/file.parquet", hit, cache, ctx);

     EXPECT_EQ(ctx, nullptr);
     EXPECT_EQ(cache, nullptr);
     EXPECT_FALSE(hit);
 }

 } // namespace doris::vectorized