blob: cd2abb778a9b291937c395e90751c93b3247a1e6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <butil/macros.h>
#include <gen_cpp/segment_v2.pb.h>
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <string>
#include <utility>
#include "olap/lru_cache.h"
#include "runtime/memory/lru_cache_policy.h"
#include "runtime/memory/mem_tracker_limiter.h"
#include "util/slice.h"
#include "vec/common/allocator.h"
#include "vec/common/allocator_fwd.h"
namespace doris {
class PageCacheHandle;
template <typename T>
class MemoryTrackedPageBase : public LRUCacheValueBase {
public:
MemoryTrackedPageBase() = default;
MemoryTrackedPageBase(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
MemoryTrackedPageBase(const MemoryTrackedPageBase&) = delete;
MemoryTrackedPageBase& operator=(const MemoryTrackedPageBase&) = delete;
~MemoryTrackedPageBase() = default;
T data() { return _data; }
size_t size() { return _size; }
protected:
T _data;
size_t _size = 0;
std::shared_ptr<MemTrackerLimiter> _mem_tracker_by_allocator;
};
class MemoryTrackedPageWithPageEntity : Allocator<false>, public MemoryTrackedPageBase<char*> {
public:
MemoryTrackedPageWithPageEntity(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
size_t capacity() { return this->_capacity; }
~MemoryTrackedPageWithPageEntity() override;
void reset_size(size_t n) {
DCHECK(n <= this->_capacity);
this->_size = n;
}
private:
size_t _capacity = 0;
};
template <typename T>
class MemoryTrackedPageWithPagePtr : public MemoryTrackedPageBase<std::shared_ptr<T>> {
public:
MemoryTrackedPageWithPagePtr(size_t b, segment_v2::PageTypePB page_type);
~MemoryTrackedPageWithPagePtr() override;
void set_data(std::shared_ptr<T> data) { this->_data = data; }
};
using SemgnetFooterPBPage = MemoryTrackedPageWithPagePtr<segment_v2::SegmentFooterPB>;
using DataPage = MemoryTrackedPageWithPageEntity;
// Wrapper around Cache, and used for cache page of column data
// in Segment.
// TODO(zc): We should add some metric to see cache hit/miss rate.
class StoragePageCache {
public:
// The unique key identifying entries in the page cache.
// Each cached page corresponds to a specific offset within
// a file.
//
// TODO(zc): Now we use file name(std::string) as a part of
// key, which is not efficient. We should make it better later
struct CacheKey {
CacheKey(std::string fname_, size_t fsize_, int64_t offset_)
: fname(std::move(fname_)), fsize(fsize_), offset(offset_) {}
std::string fname;
size_t fsize;
int64_t offset;
// Encode to a flat binary which can be used as LRUCache's key
std::string encode() const {
std::string key_buf(fname);
key_buf.append((char*)&fsize, sizeof(fsize));
key_buf.append((char*)&offset, sizeof(offset));
return key_buf;
}
};
class DataPageCache : public LRUCachePolicy {
public:
DataPageCache(size_t capacity, uint32_t num_shards)
: LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity,
LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec,
num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) {
}
};
class IndexPageCache : public LRUCachePolicy {
public:
IndexPageCache(size_t capacity, uint32_t num_shards)
: LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity,
LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec,
num_shards) {}
};
class PKIndexPageCache : public LRUCachePolicy {
public:
PKIndexPageCache(size_t capacity, uint32_t num_shards)
: LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity,
LRUCacheType::SIZE,
config::pk_index_page_cache_stale_sweep_time_sec, num_shards) {}
};
static constexpr uint32_t kDefaultNumShards = 16;
// Create global instance of this class
static StoragePageCache* create_global_cache(size_t capacity, int32_t index_cache_percentage,
int64_t pk_index_cache_capacity,
uint32_t num_shards = kDefaultNumShards);
// Return global instance.
// Client should call create_global_cache before.
static StoragePageCache* instance() { return ExecEnv::GetInstance()->get_storage_page_cache(); }
StoragePageCache(size_t capacity, int32_t index_cache_percentage,
int64_t pk_index_cache_capacity, uint32_t num_shards);
// Lookup the given page in the cache.
//
// If the page is found, the cache entry will be written into handle.
// PageCacheHandle will release cache entry to cache when it
// destructs.
//
// Cache type selection is determined by page_type argument
//
// Return true if entry is found, otherwise return false.
bool lookup(const CacheKey& key, PageCacheHandle* handle, segment_v2::PageTypePB page_type);
// Insert a page with key into this cache.
// Given handle will be set to valid reference.
// This function is thread-safe, and when two clients insert two same key
// concurrently, this function can assure that only one page is cached.
// The in_memory page will have higher priority.
void insert(const CacheKey& key, DataPage* data, PageCacheHandle* handle,
segment_v2::PageTypePB page_type, bool in_memory = false);
// Insert a std::share_ptr which points to a page into this cache.
// size should be the size of the page instead of shared_ptr.
// Internal implementation will wrap shared_ptr with MemoryTrackedPageWithPagePtr
// Since we are using std::shared_ptr, so lify cycle of the page is not managed by
// this cache alone.
// User could store a weak_ptr to the page, and lock it when needed.
// See Segment::_get_segment_footer for example.
template <typename T>
void insert(const CacheKey& key, T data, size_t size, PageCacheHandle* handle,
segment_v2::PageTypePB page_type, bool in_memory = false);
std::shared_ptr<MemTrackerLimiter> mem_tracker(segment_v2::PageTypePB page_type) {
return _get_page_cache(page_type)->mem_tracker();
}
private:
StoragePageCache();
int32_t _index_cache_percentage = 0;
std::unique_ptr<DataPageCache> _data_page_cache;
std::unique_ptr<IndexPageCache> _index_page_cache;
// Cache data for primary key index data page, seperated from data
// page cache to make it for flexible. we need this cache When construct
// delete bitmap in unique key with mow
std::unique_ptr<PKIndexPageCache> _pk_index_page_cache;
LRUCachePolicy* _get_page_cache(segment_v2::PageTypePB page_type) {
switch (page_type) {
case segment_v2::DATA_PAGE: {
return _data_page_cache.get();
}
case segment_v2::INDEX_PAGE: {
return _index_page_cache.get();
}
case segment_v2::PRIMARY_KEY_INDEX_PAGE: {
return _pk_index_page_cache.get();
}
default:
throw Exception(Status::FatalError("get error type page cache"));
}
throw Exception(Status::FatalError("__builtin_unreachable"));
}
};
// A handle for StoragePageCache entry. This class make it easy to handle
// Cache entry. Users don't need to release the obtained cache entry. This
// class will release the cache entry when it is destroyed.
class PageCacheHandle {
public:
PageCacheHandle() = default;
PageCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle)
: _cache(cache), _handle(handle) {}
~PageCacheHandle() {
if (_handle != nullptr) {
_cache->release(_handle);
}
}
PageCacheHandle(PageCacheHandle&& other) noexcept {
// we can use std::exchange if we switch c++14 on
std::swap(_cache, other._cache);
std::swap(_handle, other._handle);
}
PageCacheHandle& operator=(PageCacheHandle&& other) noexcept {
std::swap(_cache, other._cache);
std::swap(_handle, other._handle);
return *this;
}
LRUCachePolicy* cache() const { return _cache; }
Slice data() const;
template <typename T>
T get() const {
static_assert(std::is_same<typename std::remove_cv<T>::type,
std::shared_ptr<typename T::element_type>>::value,
"T must be a std::shared_ptr");
using ValueType = typename T::element_type; // Type that shared_ptr points to
MemoryTrackedPageWithPagePtr<ValueType>* page =
(MemoryTrackedPageWithPagePtr<ValueType>*)_cache->value(_handle);
return page->data();
}
private:
LRUCachePolicy* _cache = nullptr;
Cache::Handle* _handle = nullptr;
// Don't allow copy and assign
DISALLOW_COPY_AND_ASSIGN(PageCacheHandle);
};
} // namespace doris