blob: 659f80c3b0d40dacf2621d9866f64b20ea0ca756 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <roaring/roaring.hh>
#include <vector>
#include "common/status.h"
#include "storage/segment/common.h"
namespace doris {
namespace io {
class FileReader;
} // namespace io
class StorageReadOptions;
namespace segment_v2 {
class OrdinalIndexReader;
class ColumnReader;
enum class PrefetcherInitMethod : int { FROM_ROWIDS = 0, ALL_DATA_BLOCKS = 1 };
// Configuration for segment prefetcher
struct SegmentPrefetcherConfig {
// Number of file cache blocks to prefetch ahead
size_t prefetch_window_size = 4;
// File cache block size in bytes (default 1MB)
size_t block_size = 1024 * 1024;
SegmentPrefetcherConfig(size_t window_size, size_t blk_size)
: prefetch_window_size(window_size), block_size(blk_size) {}
};
// Block range representing [offset, offset + size) in the segment file
struct BlockRange {
uint64_t offset;
uint64_t size;
BlockRange(uint64_t off, uint64_t sz) : offset(off), size(sz) {}
bool operator==(const BlockRange& other) const {
return offset == other.offset && size == other.size;
}
};
// Represents a block with its first rowid for reading
struct BlockInfo {
size_t block_id;
rowid_t first_rowid;
BlockInfo(size_t bid, rowid_t rid) : block_id(bid), first_rowid(rid) {}
};
struct SegmentPrefetchParams {
SegmentPrefetcherConfig config;
const StorageReadOptions& read_options;
};
// SegmentPrefetcher maintains block sequence and triggers prefetch to keep
// N blocks ahead of current reading position.
//
// Key design:
// - Monotonic reading: rowids are read in order (forward or backward)
// - Trigger condition: when current_rowid reaches a block boundary, prefetch next N blocks
// - No deduplication needed: reading is monotonic, blocks are naturally processed in order
class SegmentPrefetcher {
public:
explicit SegmentPrefetcher(const SegmentPrefetcherConfig& config) : _config(config) {}
~SegmentPrefetcher() = default;
Status init(std::shared_ptr<ColumnReader> column_reader,
const StorageReadOptions& read_options);
bool need_prefetch(rowid_t current_rowid, std::vector<BlockRange>* out_ranges);
static void build_blocks_by_rowids(const roaring::Roaring& row_bitmap,
const std::vector<SegmentPrefetcher*>& prefetchers);
void begin_build_blocks_by_rowids();
void add_rowids(const rowid_t* rowids, uint32_t num);
void finish_build_blocks_by_rowids();
void build_all_data_blocks();
private:
// Parameters:
// row_bitmap: The complete bitmap of rowids to scan
// ordinal_index: Ordinal index reader (must be loaded)
//
// For forward reading: first_rowid is the first rowid we need to read in each block
// For backward reading: first_rowid is the last rowid we need to read in each block
// (since we read backwards, this is the first one we'll encounter)
void _build_block_sequence_from_bitmap(const roaring::Roaring& row_bitmap,
OrdinalIndexReader* ordinal_index);
size_t _offset_to_block_id(uint64_t offset) const { return offset / _config.block_size; }
BlockRange _block_id_to_range(size_t block_id) const {
return {block_id * _config.block_size, _config.block_size};
}
int window_size() const { return _prefetched_index - _current_block_index + 1; }
std::string debug_string() const {
return fmt::format(
"[internal state] _is_forward={}, _prefetched_index={}, _current_block_index={}, "
"window_size={}, block.size()={}, path={}",
_is_forward, _prefetched_index, _current_block_index, window_size(),
_block_sequence.size(), _path);
}
void reset_blocks();
private:
SegmentPrefetcherConfig _config;
std::string _path;
// Sequence of blocks with their first rowid (in reading order)
std::vector<BlockInfo> _block_sequence;
bool _is_forward = true;
int _prefetched_index = -1;
int _current_block_index = 0;
int page_idx = 0;
// For each page, track the first rowid we need to read
// For forward: the smallest rowid in this page
// For backward: the largest rowid in this page (first one we'll encounter when reading backwards)
size_t last_block_id = static_cast<size_t>(-1);
rowid_t current_block_first_rowid = 0;
OrdinalIndexReader* ordinal_index = nullptr;
};
} // namespace segment_v2
} // namespace doris