| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "util/cache/cache.h" |
| #include "util/cache/cache-internal.h" |
| |
| #include <atomic> |
| #include <memory> |
| #include <mutex> |
| #include <string> |
| #include <vector> |
| |
| #include <boost/atomic/atomic.hpp> |
| #include <boost/intrusive/list.hpp> |
| #include <boost/intrusive/list_hook.hpp> |
| #include <gflags/gflags.h> |
| #include <glog/logging.h> |
| |
| #include "common/status.h" |
| #include "gutil/mathlimits.h" |
| #include "gutil/strings/substitute.h" |
| #include "kudu/util/alignment.h" |
| #include "kudu/util/locks.h" |
| #include "kudu/util/malloc.h" |
| #include "kudu/util/mem_tracker.h" |
| #include "kudu/util/slice.h" |
| |
| DECLARE_double(cache_memtracker_approximation_ratio); |
| DEFINE_double_hidden(lirs_unprotected_percentage, 5.00, |
| "Percentage of the LIRS cache used for unprotected entries. This must be between " |
| "0.0 and 50.0."); |
| DEFINE_double_hidden(lirs_tombstone_multiple, 2.00, |
| "Multiple of tombstone entries to resident entries for LIRS cache. " |
| "If there are 100 normal entries, a multiple of 2.0 would allow 200 tombstones. " |
| "This is required to be at least 0.50."); |
| |
| using std::string; |
| using std::vector; |
| |
| using boost::intrusive::member_hook; |
| using boost::intrusive::list_member_hook; |
| |
| using kudu::Slice; |
| |
| using strings::Substitute; |
| |
| namespace impala { |
| |
| namespace { |
| |
| typedef kudu::simple_spinlock MutexType; |
| |
| // This implements the Low Inter-reference Recency Set (LIRS) algorithm described in |
| // "LIRS: An Efficient Low Inter-reference Recency Set Replacement Policy to Improve |
| // Buffer Cache Performance" Song Jiang and Xiaodong Zhang 2002. |
| // |
| // LIRS is based on a combination of recency and reuse distance. Recency is the number |
| // of other accesses since the last access to a particular key. Reuse distance is |
| // the number of accesses between the last access and the second to last access to a |
| // particular key. i.e. If we are now at access a2 and the last two access are at a0 |
| // and a1, the reuse distance is a1-a0 and the recency is a2-a1: |
| // a0------ reuse distance ---------a1-------------- recency ---------------- a2 (now) |
| // If the key has only been accessed once, its reuse distance is considered infinite. |
| // Recency provides information about what the next reuse distance will be. |
| // The intuition is that entries with a shorter reuse distance are more likely to be |
| // revisited than ones with longer reuse distance. |
| // |
| // LIRS partitions the cache between two categories: LIR (low interreference recency) |
| // and HIR (high interreference recency), and it has three different types of entries: |
| // LIR, HIR resident, HIR nonresident. For clarity, these are renamed to be |
| // PROTECTED (LIR), UNPROTECTED (HIR resident), and TOMBSTONE (HIR nonresident). |
| // PROTECTED entries have been seen more than once recently and are "protected" from |
| // eviction. A PROTECTED entry is only evicted after it transitions to being UNPROTECTED. |
| // UNPROTECTED entries are new or infrequently accessed and can be evicted directly. |
| // PROTECTED and UNPROTECTED entries occupy space in the cache and are accessible via |
| // Lookup(). TOMBSTONE entries are metadata-only entries that have already been evicted. |
| // They are not accessible via Lookup() and they don't occupy space in cache. |
| // The purpose of UNPROTECTED and TOMBSTONE entries are to provide a way to identify when |
| // there are new or previously infrequent entries that now have shorter interreference |
| // recency than existing PROTECTED entries. If an entry in the UNPROTECTED or TOMBSTONE |
| // categories is seen again and has a lower interreference recency than the oldest |
| // PROTECTED entry, the UNPROTECTED/TOMBSTONE entry is promoted to PROTECTED and the |
| // oldest PROTECTED entry is demoted to UNPROTECTED. Entries that are not in the cache, |
| // either because they were never added or have been completely removed, have the |
| // UNINITIALIZED state. Every handle starts in UNINITIALIZED and will transition to |
| // UNINITIALIZED before being freed. |
| // |
| // The cache capacity is split between the UNPROTECTED and PROTECTED categories. The size |
| // of the UNPROTECTED category is determined by the lirs_unprotected_percentage |
| // parameter, which is typically in the 1-5% range. All remaining space is used for |
| // PROTECTED entries. |
| // |
| // The implementation uses two linked lists: |
| // 1. The recency list contains all types of entries in the order in which they were last |
| // seen. The recency list maintains the invariant that the oldest entry is a PROTECTED |
| // entry. If a modification to the recency list results in the oldest entry (or |
| // entries) being UNPROTECTED or TOMBSTONE entries, they are removed. This guarantees |
| // that if an UNPROTECTED or TOMBSTONE entry is seen again while on the recency list, |
| // it has a reuse distance shorter than the oldest entry on the recency list, so it |
| // can be upgraded to a PROTECTED entry (potentially evicting the oldest PROTECTED |
| // entry). |
| // 2. The unprotected list contains only UNPROTECTED entries. Something can enter the |
| // unprotected list by being a previously unseen entry, or it can enter the |
| // unprotected list by being demoted from the PROTECTED state. The UNPROTECTED entries |
| // must fit within a fixed percentage of the cache, so when an entry is added to the |
| // unprotected list, other entries may be removed. Entries removed from the unprotected |
| // list are evicted from the cache and become TOMBSTONE entries. |
| |
| enum LIRSState : uint8_t { |
| UNINITIALIZED = 0, |
| PROTECTED = 1, |
| UNPROTECTED = 2, |
| TOMBSTONE = 3 |
| }; |
| |
| // LIRS has PROTECTED/UNPROTECTED entries that are resident (i.e. occupying cache |
| // capacity) as well as TOMBSTONE entries that are not resident (i.e. do not occupy cache |
| // capacity). DataResidency tracks whether an entry is cache resident or not. Entries |
| // always start and end as NOT_RESIDENT. Insert() transitions an entry from NOT_RESIDENT |
| // to RESIDENT. When an entry is evicted, it transitions to EVICTING, runs the eviction |
| // callback, then transitions to NOT_RESIDENT. |
| // |
| // This is distinct from the LIRSState, because eviction can only happen if there are no |
| // external references. A TOMBSTONE entry (which should be evicted) can still be RESIDENT |
| // if there is an external reference that has not been released. See the description of |
| // AtomicState for how DataResidency is used to synchronize eviction between threads. |
| enum DataResidency : uint8_t { |
| NOT_RESIDENT = 0, |
| RESIDENT = 1, |
| EVICTING = 2, |
| }; |
| |
| // The AtomicState contains all the state for an entry interacting with the LIRS |
| // cache. A large number of the transitions are operating on a single field in the |
| // struct (incrementing the refererence count, changing the state, etc), and they occur |
| // while holding the cache-wide mutex. For example, the LIRSState transitions always |
| // hold the mutex, and ref_count is only incremented while holding the mutex. This is |
| // an atomic struct to allow some codepaths such as Release() and CleanupThreadState() |
| // to avoid getting a mutex. Residency can be manipulated and ref_count can be |
| // decremented without holding a mutex. |
| // |
| // All of the interesting state transitions involve reaching agreement about which thread |
| // should evict and/or free an entry. |
| // |
| // Due to the existence of TOMBSTONE entries, eviction and free are now separate |
| // actions. For eviction, every codepath that is modifying the atomic state to an |
| // evictable state must determine if it should perform the eviction. The criteria |
| // for eviction is that state = TOMBSTONE or state = UNINITIALIZED and ref_count = 0 and |
| // residency = RESIDENT. If a thread is going to reach this state, it must instead set |
| // residency = EVICTING and perform the eviction. Setting residency = EVICTING makes |
| // it impossible for a different thread to reach an evictable state, so eviction will |
| // happen exactly once. |
| // |
| // Determining which thread frees an entry is determined by which thread reaches |
| // state = UNINITIALIZED, ref_count = 0, residency = NOT_RESIDENT. The scenario that this |
| // is designed to solve is this: |
| // Entry starts with state = TOMBSTONE, ref_count = 1, resident = RESIDENT |
| // Thread 1: calls Release(), decrementing ref_count to 0 and setting resident = EVICTING |
| // Thread 1: starts running CleanupThreadState() |
| // Thread 2: starts running ToUninitialized() |
| // There is a race between Thread 1 and Thread 2. |
| // If Thread 1 sets resident = NOT_RESIDENT first, Thread 2 performs free. |
| // If Thread 2 sets state = UNINITIALIZED first, Thread 1 performs free. |
| // Eviction is not assumed to be fast or instantaneous, so it is useful that Thread 2 |
| // does not need to wait for Thread 1 to complete eviction. |
| // |
| // This is 32-bits to allow simple atomic operations and so that AtomicStateTransition is |
| // 64-bits. |
| struct AtomicState { |
| // 'ref_count' is a count of the number of external references (i.e. handles that |
| // have been returned from Lookup() or Insert() but have not yet called Release()). |
| // This is not specific to LIRS, every cache algorithm needs an equivalent count. |
| // This is strictly external references. LIRS does not count itself as a reference, |
| // because it has a separate state field to track this. So, if there are no external |
| // references, this is zero. In order for an entry to be evicted or freed, the |
| // ref_count must be zero. |
| uint16_t ref_count; |
| LIRSState state; |
| DataResidency residency; |
| }; |
| static_assert(sizeof(AtomicState) == 4, "AtomicState has unexpected size"); |
| |
| // Struct to represent a successful atomic transition with the before and after values |
| struct AtomicStateTransition { |
| AtomicStateTransition(AtomicState x, AtomicState y) |
| : before(x), after(y) {} |
| AtomicState before; |
| AtomicState after; |
| }; |
| static_assert(sizeof(AtomicStateTransition) == 8, |
| "AtomicStateTransition has unexpected size"); |
| |
| class LIRSHandle : public HandleBase { |
| public: |
| LIRSHandle(uint8_t* kv_ptr, const Slice& key, int32_t hash, int val_len, int charge) |
| : HandleBase(kv_ptr, key, hash, val_len, charge) { |
| AtomicState init_state; |
| init_state.ref_count = 0; |
| init_state.state = UNINITIALIZED; |
| init_state.residency = NOT_RESIDENT; |
| atomic_state_.store(init_state); |
| } |
| |
| ~LIRSHandle() { |
| AtomicState destruct_state = atomic_state_.load(); |
| DCHECK_EQ(destruct_state.state, UNINITIALIZED); |
| DCHECK_EQ(destruct_state.ref_count, 0); |
| DCHECK_EQ(destruct_state.residency, NOT_RESIDENT); |
| } |
| |
| Cache::EvictionCallback* eviction_callback_; |
| // Recency list double-link |
| list_member_hook<> recency_list_hook_; |
| // Unprotected/Tombstone list double-link. |
| list_member_hook<> unprotected_tombstone_list_hook_; |
| |
| // The boost functionality is the same as the std library functionality. Using boost |
| // solves a linking issue when using UBSAN with dynamic linking. |
| // TODO: Switch back to std library atomics when this works. |
| boost::atomic<AtomicState> atomic_state_; |
| |
| // Since compare and swap can fail when there are concurrent modifications, this |
| // wraps some of the boilerplate compare and swap retry logic. This performs |
| // the provided lambda in a loop until the compare and swap succeeds. |
| // The lambda is required to mutate the AtomicState argument passed to it. |
| // This returns a structure containing the old atomic value and new atomic value. |
| AtomicStateTransition modify_atomic_state(const std::function<void(AtomicState&)>& fn) { |
| AtomicState old_atomic_state = atomic_state_.load(); |
| AtomicState new_atomic_state = old_atomic_state; |
| while (true) { |
| new_atomic_state = old_atomic_state; |
| // fn mutates new_atomic_state and must change something |
| fn(new_atomic_state); |
| DCHECK(old_atomic_state.ref_count != new_atomic_state.ref_count || |
| old_atomic_state.state != new_atomic_state.state || |
| old_atomic_state.residency != new_atomic_state.residency); |
| if (atomic_state_.compare_exchange_weak(old_atomic_state, new_atomic_state)) { |
| break; |
| } |
| } |
| return AtomicStateTransition(old_atomic_state, new_atomic_state); |
| } |
| |
| LIRSState state() const { |
| return atomic_state_.load().state; |
| } |
| |
| // The caller must be holding the mutex_. |
| void set_state(LIRSState old_state, LIRSState new_state) { |
| modify_atomic_state([old_state, new_state](AtomicState& cur_state) { |
| DCHECK_EQ(cur_state.state, old_state); |
| cur_state.state = new_state; |
| }); |
| } |
| |
| DataResidency residency() const { |
| return atomic_state_.load().residency; |
| } |
| |
| void set_resident() { |
| modify_atomic_state([](AtomicState& cur_state) { |
| DCHECK_EQ(cur_state.residency, NOT_RESIDENT); |
| cur_state.residency = RESIDENT; |
| }); |
| } |
| |
| uint16_t num_references() const { |
| return atomic_state_.load().ref_count; |
| } |
| |
| // The caller must be holding the mutex_. |
| void get_reference() { |
| modify_atomic_state([](AtomicState& cur_state) { |
| // Check whether the reference count would wrap. There is no use case that |
| // currently needs to exceed 65k concurrent references, and this is likely |
| // to be a bug in the caller's code. For now, this asserts, but there may be |
| // a better behavior if needed. |
| CHECK_LT(cur_state.ref_count, std::numeric_limits<uint16_t>::max()); |
| ++cur_state.ref_count; |
| }); |
| } |
| }; |
| |
| struct LIRSThreadState { |
| // Handles to evict (must already be in TOMBSTONE or UNINITIALIZED state), because |
| // eviction is done without holding a mutex. |
| vector<LIRSHandle*> handles_to_evict; |
| // Handles to free (must already be in the UNINITIALIZED state and removed from |
| // structures). |
| vector<LIRSHandle*> handles_to_free; |
| }; |
| |
| // Boost intrusive lists allow the handle to easily be on multiple lists simultaneously. |
| // Each hook is equivalent to a prev/next pointer. |
| typedef member_hook<LIRSHandle, list_member_hook<>, |
| &LIRSHandle::recency_list_hook_> RecencyListOption; |
| typedef boost::intrusive::list<LIRSHandle, RecencyListOption> RecencyList; |
| |
| typedef member_hook<LIRSHandle, list_member_hook<>, |
| &LIRSHandle::unprotected_tombstone_list_hook_> UnprotectedTombstoneListOption; |
| typedef boost::intrusive::list<LIRSHandle, |
| UnprotectedTombstoneListOption> UnprotectedTombstoneList; |
| |
| // A single shard of sharded cache. |
| class LIRSCacheShard : public CacheShard { |
| public: |
| explicit LIRSCacheShard(kudu::MemTracker* tracker, size_t capacity); |
| ~LIRSCacheShard(); |
| |
| Status Init() override; |
| HandleBase* Allocate(Slice key, uint32_t hash, int val_len, size_t charge) override; |
| void Free(HandleBase* handle) override; |
| HandleBase* Insert(HandleBase* handle, |
| Cache::EvictionCallback* eviction_callback) override; |
| HandleBase* Lookup(const Slice& key, uint32_t hash, bool no_updates) override; |
| void UpdateCharge(HandleBase* handle, size_t charge) override; |
| void Release(HandleBase* handle) override; |
| void Erase(const Slice& key, uint32_t hash) override; |
| size_t Invalidate(const Cache::InvalidationControl& ctl) override; |
| vector<HandleBase*> Dump() override; |
| |
| private: |
| |
| // Update the memtracker's consumption by the given amount. |
| // |
| // This "buffers" the updates locally in 'deferred_consumption_' until the amount |
| // of accumulated delta is more than ~1% of the cache capacity. This improves |
| // performance under workloads with high eviction rates for a few reasons: |
| // |
| // 1) once the cache reaches its full capacity, we expect it to remain there |
| // in steady state. Each insertion is usually matched by an eviction, and unless |
| // the total size of the evicted item(s) is much different than the size of the |
| // inserted item, each eviction event is unlikely to change the total cache usage |
| // much. So, we expect that the accumulated error will mostly remain around 0 |
| // and we can avoid propagating changes to the MemTracker at all. |
| // |
| // 2) because the cache implementation is sharded, we do this tracking in a bunch |
| // of different locations, avoiding bouncing cache-lines between cores. By contrast |
| // the MemTracker is a simple integer, so it doesn't scale as well under concurrency. |
| // |
| // Positive delta indicates an increased memory consumption. |
| void UpdateMemTracker(int64_t delta); |
| |
| // MoveToRecencyListBack takes an entry and makes it the most recent entry in the |
| // recency list. |
| void MoveToRecencyListBack(LIRSThreadState* tstate, LIRSHandle* e, |
| bool expected_in_list=true); |
| |
| // TrimRecencyList maintains the invariant that the oldest element in the recency list |
| // is a protected entry. This iterates the recency list from oldest to newest, removing |
| // entries until the oldest entry is a PROTECTED entry. |
| void TrimRecencyList(LIRSThreadState* tstate); |
| |
| // Helper functions for adding to the UNPROTECTED block in the |
| // unprotected_tombstone_list_. This sets unprotected_list_front_ if this |
| // is the first UNPROTECTED entry in the list. This function relies on |
| // appropriate ordering of operations. List operations like this need to |
| // happen before updating counts/usage (e.g. num_unprotected_). |
| void AddToUnprotectedList(LIRSHandle* e); |
| |
| // Helper function for removing from the UNPROTECTED block in the |
| // unprotected_tombstone_list_. This detects whether this is the front of the |
| // unprotected block and maintains unprotected_list_front_ appropriately. |
| // This function relies on appropriate ordering of operations. List operations |
| // like this need to happen before updating counts/usage (e.g. num_unprotected_). |
| void RemoveFromUnprotectedList(LIRSHandle* e); |
| |
| // This iterates the recency list from oldest to newest removing TOMBSTONE entires |
| // until the number of TOMBSTONE entries is less than the limit specified by |
| // lirs_tombstone_multiple. |
| void EnforceTombstoneLimit(LIRSThreadState* tstate); |
| |
| // This enforces the capacity constraint on the PROTECTED entries. As long as the |
| // protected usage exceeds the protected capacity, it removes the oldest PROTECTED |
| // entry from the recency list and transitions it to an UNPROTECTED entry. |
| void EnforceProtectedCapacity(LIRSThreadState* tstate); |
| |
| // This enforces the capacity constraint on the UNPROTECTED entries. As long as the |
| // unprotected usage exceeds the unprotected capacity, it removes the oldest |
| // UNPROTECTED entry from the unprotected list. The removed entry becomes |
| // a TOMBSTONE entry or an UNINITIALIZED entry. |
| void EnforceUnprotectedCapacity(LIRSThreadState* tstate); |
| |
| // Enter the cache |
| // UninitializedToProtected always succeeds |
| void UninitializedToProtected(LIRSThreadState* tstate, LIRSHandle* e); |
| // UninitializedToUnprotected can fail when the entry is too large. Returns whether the |
| // entry was succcessfully added. |
| bool UninitializedToUnprotected(LIRSThreadState* tstate, LIRSHandle* e); |
| |
| // Bounce around the cache |
| void UnprotectedToProtected(LIRSThreadState* tstate, LIRSHandle* e); |
| void ProtectedToUnprotected(LIRSThreadState* tstate, LIRSHandle* e); |
| void UnprotectedToTombstone(LIRSThreadState* tstate, LIRSHandle* e); |
| |
| // Exit the cache |
| void ToUninitialized(LIRSThreadState* tstate, LIRSHandle* e, bool is_trim = false); |
| |
| // Functions move evictions and frees outside the critical section for mutex_ by |
| // placing the affected entries on the LIRSThreadState. This function handles the |
| // accumulated evictions/frees. It should be called without holding any locks. |
| void CleanupThreadState(LIRSThreadState* tstate); |
| |
| bool initialized_ = false; |
| |
| // Capacity is split between protected and unprotected entities. Unprotected entities |
| // make up a much smaller proportion of the cache than protected. |
| // Note: updates to counts/usage happen after doing list operations. |
| size_t capacity_; |
| size_t unprotected_capacity_; |
| size_t unprotected_usage_ = 0; |
| size_t num_unprotected_ = 0; |
| size_t protected_capacity_; |
| size_t protected_usage_ = 0; |
| size_t num_protected_ = 0; |
| size_t num_tombstones_ = 0; |
| |
| // mutex_ protects the following state. |
| MutexType mutex_; |
| |
| // Recency list (called the LIRS stack in the paper) |
| RecencyList recency_list_; |
| |
| // This list is a combination of the unprotected list (called the HIR list in the |
| // paper) and the tombstone list (not in the paper). The tombstone list is used |
| // to efficiently implement the lirs_tombstone_multiple. Without a tombstone list, |
| // finding the oldest TOMBSTONE entry requires traversing the recency list, |
| // potentially passing a large number of entries before reaching a TOMBSTONE entry. |
| // The tombstone list makes this O(1). |
| // |
| // The combined list still behaves like two separate lists. In particular, the |
| // UNPROTECTED entries and TOMBSTONE entries are each contiguous blocks. |
| // The block of newer UNPROTECTED entries is at the back of the list. The block |
| // of oldest TOMBSTONE entries is at the front. The unprotected_list_front_ points |
| // to the UNPROTECTED entry that is at the block boundary. |
| // Visually, it is: |
| // newest/end() |--- UNPROTECTED entries ---|--- TOMBSTONE entries ---| oldest/begin() |
| // ^ ^ ^ |
| // back() unprotected_list_front_ front() |
| // |
| // Combining the two lists reduces the cost of transferring an element from |
| // UNPROTECTED to TOMBSTONE, which is a common operation. To simplify operations on the |
| // UNPROTECTED block, see the two helper functions AddToUnprotectedList() and |
| // RemoveFromUnprotectedList(). |
| UnprotectedTombstoneList unprotected_tombstone_list_; |
| |
| // The front of the UNPROTECTED block in the unprotected_tombstone_list_. This is |
| // nullptr if there are no UNPROTECTED entries. |
| LIRSHandle* unprotected_list_front_ = nullptr; |
| |
| // Hash table that maps keys to handles. |
| HandleTable table_; |
| |
| kudu::MemTracker* mem_tracker_; |
| std::atomic<int64_t> deferred_consumption_ { 0 }; |
| |
| // Initialized based on capacity_ to ensure an upper bound on the error on the |
| // MemTracker consumption. |
| int64_t max_deferred_consumption_; |
| }; |
| |
| LIRSCacheShard::LIRSCacheShard(kudu::MemTracker* tracker, size_t capacity) |
| : capacity_(capacity), mem_tracker_(tracker) {} |
| |
| LIRSCacheShard::~LIRSCacheShard() { |
| LIRSThreadState tstate; |
| // Start with the recency list. |
| while (!recency_list_.empty()) { |
| LIRSHandle* e = &recency_list_.front(); |
| DCHECK_NE(e->state(), UNINITIALIZED); |
| DCHECK_EQ(e->num_references(), 0); |
| // This removes it from the recency list and the unprotected list (if appropriate) |
| ToUninitialized(&tstate, e); |
| } |
| // Anything remaining was not on the recency list. All the TOMBSTONE entries are |
| // on the recency list and get removed in the previous step, so this is now purely |
| // UNPROTECTED entries. |
| while (!unprotected_tombstone_list_.empty()) { |
| LIRSHandle* e = &*unprotected_tombstone_list_.begin(); |
| DCHECK_EQ(e->state(), UNPROTECTED); |
| DCHECK_EQ(e->num_references(), 0); |
| // This removes it from the unprotected list |
| ToUninitialized(&tstate, e); |
| } |
| DCHECK(unprotected_list_front_ == nullptr); |
| CleanupThreadState(&tstate); |
| // Eviction calls UpdateMemTracker() for each handle, but UpdateMemTracker() only |
| // updates the underlying mem_tracker_ if the deferred consumption exceeds the |
| // max deferred consumption. There can be deferred consumption left over here, |
| // so decrement any remaining deferred consumption. |
| mem_tracker_->Consume(deferred_consumption_); |
| } |
| |
| Status LIRSCacheShard::Init() { |
| // To save some implementation complexity, avoid dealing with unprotected |
| // percentage >= 50.00%. This is far outside the range of what is useful for |
| // LIRS. |
| if (!MathLimits<double>::IsFinite(FLAGS_lirs_unprotected_percentage) || |
| FLAGS_lirs_unprotected_percentage >= 50.0 || |
| FLAGS_lirs_unprotected_percentage < 0.0) { |
| return Status(Substitute("Misconfigured --lirs_unprotected_percentage: $0. " |
| "Must be between 0 and 50.", FLAGS_lirs_unprotected_percentage)); |
| } |
| if (!MathLimits<double>::IsFinite(FLAGS_cache_memtracker_approximation_ratio) || |
| FLAGS_cache_memtracker_approximation_ratio < 0.0 || |
| FLAGS_cache_memtracker_approximation_ratio > 1.0) { |
| return Status(Substitute("Misconfigured --cache_memtracker_approximation_ratio: $0. " |
| "Must be between 0 and 1.", FLAGS_cache_memtracker_approximation_ratio)); |
| } |
| if (!MathLimits<double>::IsFinite(FLAGS_lirs_tombstone_multiple) || |
| FLAGS_lirs_tombstone_multiple < 0.5) { |
| return Status(Substitute("Misconfigured --lirs_tombstone_multiple: $0. " |
| "Must be 0.5 or greater.", FLAGS_lirs_tombstone_multiple)); |
| } |
| unprotected_capacity_ = |
| static_cast<size_t>((capacity_ * FLAGS_lirs_unprotected_percentage) / 100.00); |
| protected_capacity_ = capacity_ - unprotected_capacity_; |
| CHECK_GE(protected_capacity_, unprotected_capacity_); |
| max_deferred_consumption_ = capacity_ * FLAGS_cache_memtracker_approximation_ratio; |
| initialized_ = true; |
| return Status::OK(); |
| } |
| |
| void LIRSCacheShard::UpdateMemTracker(int64_t delta) { |
| int64_t old_deferred = deferred_consumption_.fetch_add(delta); |
| int64_t new_deferred = old_deferred + delta; |
| |
| if (new_deferred > max_deferred_consumption_ || |
| new_deferred < -max_deferred_consumption_) { |
| int64_t to_propagate = deferred_consumption_.exchange(0, std::memory_order_relaxed); |
| mem_tracker_->Consume(to_propagate); |
| } |
| } |
| |
| void LIRSCacheShard::EnforceProtectedCapacity(LIRSThreadState* tstate) { |
| while (protected_usage_ > protected_capacity_) { |
| DCHECK(!recency_list_.empty()); |
| // Get pointer to oldest entry and remove it |
| LIRSHandle* oldest = &recency_list_.front(); |
| // The oldest entry must be protected (i.e. the recency list must be trimmed) |
| DCHECK_EQ(oldest->state(), PROTECTED); |
| ProtectedToUnprotected(tstate, oldest); |
| } |
| } |
| |
| void LIRSCacheShard::TrimRecencyList(LIRSThreadState* tstate) { |
| // This function maintains the invariant that the oldest entry in the list must be |
| // a protected entry. Look at the oldest entry in the list (i.e. the front). If it is |
| // not protected, remove it from the list. If it is a tombstone entry, it needs to be |
| // deleted. Unprotected entries still exist in the unprotected list, so UNPROTECTED |
| // entries should only be removed from the recency list. |
| while (!recency_list_.empty() && recency_list_.front().state() != PROTECTED) { |
| LIRSHandle* oldest = &recency_list_.front(); |
| if (oldest->state() == TOMBSTONE) { |
| ToUninitialized(tstate, oldest, /* is_trim */ true); |
| } else { |
| DCHECK_EQ(oldest->state(), UNPROTECTED); |
| recency_list_.pop_front(); |
| } |
| } |
| } |
| |
| void LIRSCacheShard::EnforceTombstoneLimit(LIRSThreadState* tstate) { |
| // If there are a large number of entries that haven't been seen before, the number |
| // of tombstones can grow without bound. This enforces an upper bound on the total |
| // number of tombstones to limit the metadata size. This is defined as a multiple of |
| // the total number of entries in the cache. |
| // TODO: It would help performance to enforce this with some inexactness (i.e. batch |
| // the removals). |
| size_t num_elems = num_unprotected_ + num_protected_; |
| size_t tombstone_limit = |
| static_cast<size_t>(static_cast<double>(num_elems) * FLAGS_lirs_tombstone_multiple); |
| if (num_tombstones_ <= tombstone_limit) return; |
| // Remove the oldest TOMBSTONE entries from the front of unprotected_tombstone_list_ |
| // until we are back under the limit. |
| auto it = unprotected_tombstone_list_.begin(); |
| while (num_tombstones_ > tombstone_limit) { |
| LIRSHandle* e = &*it; |
| it = unprotected_tombstone_list_.erase(it); |
| DCHECK_EQ(e->state(), TOMBSTONE); |
| // This will remove the entry from the recency_list_. |
| ToUninitialized(tstate, e); |
| } |
| } |
| |
| void LIRSCacheShard::EnforceUnprotectedCapacity(LIRSThreadState* tstate) { |
| while (unprotected_usage_ > unprotected_capacity_) { |
| // Evict the oldest UNPROTECTED entry from the unprotected list. This transitions it |
| // from UNPROTECTED to a TOMBSTONE entry |
| DCHECK_GT(num_unprotected_, 0); |
| DCHECK(!unprotected_tombstone_list_.empty()); |
| DCHECK(unprotected_list_front_ != nullptr); |
| LIRSHandle* oldest = unprotected_list_front_; |
| if (oldest->recency_list_hook_.is_linked()) { |
| UnprotectedToTombstone(tstate, oldest); |
| } else { |
| // If the oldest entry is not on the recency list, then its reuse distance is |
| // longer than the oldest entry on the recency list. This entry should be deleted |
| // rather than becoming a TOMBSTONE. |
| ToUninitialized(tstate, oldest); |
| } |
| } |
| } |
| |
| void LIRSCacheShard::MoveToRecencyListBack(LIRSThreadState* tstate, LIRSHandle *e, |
| bool expected_in_list) { |
| // Remove and add to the back of the list as the most recent element |
| bool in_list = e->recency_list_hook_.is_linked(); |
| CHECK(!expected_in_list || in_list); |
| bool need_trim = false; |
| if (in_list) { |
| // Is this the oldest entry in the list (the front)? |
| LIRSHandle* oldest = &recency_list_.front(); |
| // Invariant: the oldest entry in the list is always a protected entry |
| CHECK_EQ(oldest->state(), PROTECTED); |
| if (oldest == e) { |
| need_trim = true; |
| } |
| recency_list_.erase(recency_list_.iterator_to(*e)); |
| } |
| recency_list_.push_back(*e); |
| if (need_trim) { |
| TrimRecencyList(tstate); |
| } |
| } |
| |
| void LIRSCacheShard::AddToUnprotectedList(LIRSHandle* e) { |
| DCHECK(!e->unprotected_tombstone_list_hook_.is_linked()); |
| DCHECK_EQ(e->state(), UNPROTECTED); |
| unprotected_tombstone_list_.push_back(*e); |
| // Updates to counts are always after list operations. If the list is empty, |
| // this count is zero. |
| if (num_unprotected_ == 0) { |
| DCHECK(unprotected_list_front_ == nullptr); |
| unprotected_list_front_ = e; |
| } |
| } |
| |
| void LIRSCacheShard::RemoveFromUnprotectedList(LIRSHandle* e) { |
| DCHECK(e->unprotected_tombstone_list_hook_.is_linked()); |
| DCHECK_EQ(e->state(), UNPROTECTED); |
| DCHECK_GE(num_unprotected_, 1); |
| auto it = unprotected_tombstone_list_.iterator_to(*e); |
| it = unprotected_tombstone_list_.erase(it); |
| // If it was the front of the list, then unprotected_list_front_ needs to be |
| // updated. |
| if (e == unprotected_list_front_) { |
| // Updates to counts are always after list operations, so num_unprotected_ |
| // includes this entry. |
| if (num_unprotected_ > 1) { |
| // This was not the last unprotected entry |
| DCHECK(it != unprotected_tombstone_list_.end()); |
| unprotected_list_front_ = &*it; |
| } else { |
| // This was the last unprotected entry |
| DCHECK(it == unprotected_tombstone_list_.end()); |
| unprotected_list_front_ = nullptr; |
| } |
| } |
| } |
| |
| void LIRSCacheShard::UnprotectedToProtected(LIRSThreadState* tstate, LIRSHandle *e) { |
| // This only happens for entries that are only the recency list |
| DCHECK(e->recency_list_hook_.is_linked()); |
| // Make this the most recent entry in the recency list and remove from the |
| // unprotected list. |
| MoveToRecencyListBack(tstate, e); |
| RemoveFromUnprotectedList(e); |
| e->set_state(UNPROTECTED, PROTECTED); |
| // List operations happen before changes to the counts/usage |
| --num_unprotected_; |
| ++num_protected_; |
| // Transfer the usage |
| unprotected_usage_ -= e->charge(); |
| protected_usage_ += e->charge(); |
| // If necessary, evict old entries from the recency list, protected to unprotected |
| EnforceProtectedCapacity(tstate); |
| } |
| |
| void LIRSCacheShard::ProtectedToUnprotected(LIRSThreadState* tstate, LIRSHandle* e) { |
| DCHECK(!e->unprotected_tombstone_list_hook_.is_linked()); |
| DCHECK(e->recency_list_hook_.is_linked()); |
| // Must be the oldest on the recency list |
| DCHECK(&recency_list_.front() == e); |
| recency_list_.erase(recency_list_.iterator_to(*e)); |
| // Trim the recency list after the removal |
| TrimRecencyList(tstate); |
| e->set_state(PROTECTED, UNPROTECTED); |
| // Add to unprotected list as the newest entry |
| AddToUnprotectedList(e); |
| // List operations happen before changes to the counts/usage |
| --num_protected_; |
| ++num_unprotected_; |
| // Transfer the usage |
| protected_usage_ -= e->charge(); |
| unprotected_usage_ += e->charge(); |
| // If necessary, evict an entry from the unprotected list |
| EnforceUnprotectedCapacity(tstate); |
| } |
| |
| void LIRSCacheShard::UnprotectedToTombstone(LIRSThreadState* tstate, LIRSHandle* e) { |
| // Decrement unprotected usage |
| DCHECK(e->unprotected_tombstone_list_hook_.is_linked()); |
| DCHECK(e->recency_list_hook_.is_linked()); |
| |
| // Optimized code to go from UNPROTECTED to TOMBSTONE without adding/removing |
| // it from a list. Only the unprotected_list_front_ moves. |
| DCHECK_EQ(e, unprotected_list_front_); |
| if (num_unprotected_ > 1) { |
| // There is some other UNPROTECTED entry |
| auto it = unprotected_tombstone_list_.iterator_to(*e); |
| ++it; |
| unprotected_list_front_ = &*it; |
| } else { |
| // This was the only UNPROTECTED entry, the unprotected list is empty |
| unprotected_list_front_ = nullptr; |
| } |
| |
| // Set state to TOMBSTONE. If there are currently no references, this thread is |
| // responsible for eviction, so it must also set the residency to EVICTING. |
| auto state_transition = e->modify_atomic_state([](AtomicState& cur_state) { |
| DCHECK_EQ(cur_state.state, UNPROTECTED); |
| DCHECK_EQ(cur_state.residency, RESIDENT); |
| cur_state.state = TOMBSTONE; |
| if (cur_state.ref_count == 0) cur_state.residency = EVICTING; |
| }); |
| // Look at the old state. If there are no references, then we can immediately evict. |
| if (state_transition.before.ref_count == 0) { |
| tstate->handles_to_evict.push_back(e); |
| } |
| // List operations happen before changes to the counts/usage |
| --num_unprotected_; |
| ++num_tombstones_; |
| |
| // This will be evicted, so the charge disappears |
| unprotected_usage_ -= e->charge(); |
| EnforceTombstoneLimit(tstate); |
| } |
| |
| void LIRSCacheShard::UninitializedToProtected(LIRSThreadState* tstate, LIRSHandle* e) { |
| DCHECK(!e->unprotected_tombstone_list_hook_.is_linked()); |
| DCHECK(!e->recency_list_hook_.is_linked()); |
| e->set_state(UNINITIALIZED, PROTECTED); |
| HandleBase* existing = table_.Insert(e); |
| DCHECK(existing == nullptr); |
| recency_list_.push_back(*e); |
| // List operations happen before changes to the counts/usage |
| ++num_protected_; |
| protected_usage_ += e->charge(); |
| EnforceProtectedCapacity(tstate); |
| } |
| |
| bool LIRSCacheShard::UninitializedToUnprotected(LIRSThreadState* tstate, LIRSHandle* e) { |
| DCHECK(!e->unprotected_tombstone_list_hook_.is_linked()); |
| DCHECK(!e->recency_list_hook_.is_linked()); |
| e->set_state(UNINITIALIZED, UNPROTECTED); |
| HandleBase* existing = table_.Insert(e); |
| DCHECK(existing == nullptr); |
| recency_list_.push_back(*e); |
| AddToUnprotectedList(e); |
| // List operations happen before changes to the counts/usage |
| ++num_unprotected_; |
| unprotected_usage_ += e->charge(); |
| // If necessary, evict an entry from the unprotected list |
| EnforceUnprotectedCapacity(tstate); |
| // There is exactly one failure case: |
| // If the new entry is larger than the unprotected capacity, then it will cause all |
| // unprotected entries to be removed (including itself). |
| if (e->charge() > unprotected_capacity_) { |
| DCHECK(e->state() == UNINITIALIZED || e->state() == TOMBSTONE); |
| DCHECK_EQ(num_unprotected_, 0); |
| return false; |
| } |
| // The entry remains in the cache |
| DCHECK_EQ(e->state(), UNPROTECTED); |
| DCHECK(!unprotected_tombstone_list_.empty()); |
| DCHECK(e->unprotected_tombstone_list_hook_.is_linked()); |
| return true; |
| } |
| |
| void LIRSCacheShard::ToUninitialized(LIRSThreadState* tstate, LIRSHandle* e, |
| bool is_trim) { |
| DCHECK_NE(e->state(), UNINITIALIZED); |
| LIRSHandle* removed_elem = static_cast<LIRSHandle*>(table_.Remove(e->key(), e->hash())); |
| DCHECK(e == removed_elem || removed_elem == nullptr); |
| // Remove from the list (if it is in the list) |
| if (e->recency_list_hook_.is_linked()) { |
| // If we are removing the last entry on the recency list, then we may need to call |
| // TrimRecencyList to maintain our invariant that the last entry on the recency list |
| // is a PROTECTED element. TrimRecencyList itself calls this function while enforcing |
| // the invariant, so this passes is_trim=true from TrimRecencyList to avoid the |
| // unnecessary recursion. |
| bool need_trim = false; |
| // Is this the oldest entry in the list (the front)? |
| LIRSHandle* oldest = &recency_list_.front(); |
| if (!is_trim and oldest == e) { |
| DCHECK_EQ(e->state(), PROTECTED); |
| need_trim = true; |
| } |
| recency_list_.erase(recency_list_.iterator_to(*e)); |
| if (need_trim) { |
| TrimRecencyList(tstate); |
| } |
| } |
| if (e->state() == UNPROTECTED) { |
| if (e->unprotected_tombstone_list_hook_.is_linked()) { |
| // Remove from the unprotected list and decrement usage |
| RemoveFromUnprotectedList(e); |
| } |
| --num_unprotected_; |
| unprotected_usage_ -= e->charge(); |
| } else if (e->state() == TOMBSTONE) { |
| if (e->unprotected_tombstone_list_hook_.is_linked()) { |
| unprotected_tombstone_list_.erase(unprotected_tombstone_list_.iterator_to(*e)); |
| } |
| --num_tombstones_; |
| } else { |
| DCHECK(!e->unprotected_tombstone_list_hook_.is_linked()); |
| } |
| if (e->state() == PROTECTED) { |
| protected_usage_ -= e->charge(); |
| --num_protected_; |
| } |
| |
| auto state_transition = e->modify_atomic_state([](AtomicState& cur_state) { |
| cur_state.state = UNINITIALIZED; |
| // If it is not resident, there must be no references. |
| if (cur_state.residency == NOT_RESIDENT || cur_state.residency == EVICTING) { |
| DCHECK_EQ(cur_state.ref_count, 0); |
| } |
| // If this is resident without references, we can start eviction. |
| if (cur_state.residency == RESIDENT && cur_state.ref_count == 0) { |
| cur_state.residency = EVICTING; |
| } |
| }); |
| |
| if (state_transition.before.residency == RESIDENT && |
| state_transition.after.residency == EVICTING) { |
| // We did a transition from RESIDENT to EVICTING, so we are responsible for |
| // eviction. Since we know the new state is UNINITIALIZED, we are also responsible |
| // for freeing it, but the eviction code will handle that appropriately. |
| tstate->handles_to_evict.push_back(e); |
| } |
| |
| // This was already evicted, so now it just needs to be freed. |
| if (state_transition.before.residency == NOT_RESIDENT) { |
| tstate->handles_to_free.push_back(e); |
| } |
| } |
| |
| HandleBase* LIRSCacheShard::Allocate(Slice key, uint32_t hash, int val_len, |
| size_t charge) { |
| DCHECK(initialized_); |
| int key_len = key.size(); |
| DCHECK_GE(key_len, 0); |
| DCHECK_GE(val_len, 0); |
| DCHECK_GT(charge, 0); |
| if (charge == 0 || charge > protected_capacity_) { |
| return nullptr; |
| } |
| int key_len_padded = KUDU_ALIGN_UP(key_len, sizeof(void*)); |
| uint8_t* buf = new uint8_t[sizeof(LIRSHandle) |
| + key_len_padded + val_len]; // the kv_data VLA data |
| int calc_charge = |
| (charge == Cache::kAutomaticCharge) ? kudu::kudu_malloc_usable_size(buf) : charge; |
| uint8_t* kv_ptr = buf + sizeof(LIRSHandle); |
| LIRSHandle* handle = new (buf) LIRSHandle(kv_ptr, key, hash, val_len, |
| calc_charge); |
| return handle; |
| } |
| |
| void LIRSCacheShard::Free(HandleBase* handle) { |
| DCHECK(initialized_); |
| // We allocate the handle as a uint8_t array, then we call a placement new, |
| // which calls the constructor. For symmetry, we call the destructor and then |
| // delete on the uint8_t array. |
| LIRSHandle* h = static_cast<LIRSHandle*>(handle); |
| h->~LIRSHandle(); |
| uint8_t* data = reinterpret_cast<uint8_t*>(handle); |
| delete [] data; |
| } |
| |
| HandleBase* LIRSCacheShard::Lookup(const Slice& key, uint32_t hash, |
| bool no_updates) { |
| DCHECK(initialized_); |
| LIRSHandle* e; |
| LIRSThreadState tstate; |
| { |
| std::lock_guard<MutexType> l(mutex_); |
| e = static_cast<LIRSHandle*>(table_.Lookup(key, hash)); |
| if (e != nullptr) { |
| CHECK_NE(e->state(), UNINITIALIZED); |
| // If the handle is a TOMBSTONE, Lookup() should pretend the entry doesn't exist. |
| // TOMBSTONE has special treatment in Insert(); no other action is necessary here. |
| if (e->state() == TOMBSTONE) return nullptr; |
| e->get_reference(); |
| // If this is a no update lookup, nothing has changed. We can just return the |
| // entry. |
| if (no_updates) return e; |
| switch (e->state()) { |
| case UNINITIALIZED: |
| // Needed to keep clang-tidy happy |
| CHECK(false); |
| case PROTECTED: |
| // The PROTECTED entry is in the recency list, and the only action is to make |
| // it the most recent element in the list. This can result in evictions if it is |
| // currently the oldest entry. |
| MoveToRecencyListBack(&tstate, e); |
| break; |
| case UNPROTECTED: |
| if (e->recency_list_hook_.is_linked()) { |
| // If an UNPROTECTED entry is in the recency list when it is referenced, |
| // then we know that its new reuse distance is shorter than the last PROTECTED |
| // entry in the list. So, this entry is upgraded from UNPROTECTED to |
| // PROTECTED. |
| UnprotectedToProtected(&tstate, e); |
| } else { |
| // If an UNPROTECTED entry is not on the recency list, then its new reuse |
| // distance is longer than any of the PROTECTED entries. So, it remains |
| // an UNPROTECTED entry, but it should be added back to the recency list and |
| // readded as the most recent entry on the unprotected list. |
| DCHECK_GT(num_unprotected_, 0); |
| MoveToRecencyListBack(&tstate, e, false); |
| if (num_unprotected_ != 1) { |
| RemoveFromUnprotectedList(e); |
| AddToUnprotectedList(e); |
| } |
| } |
| break; |
| default: |
| CHECK(false) << "Unexpected state for Lookup: " << e->state(); |
| } |
| } |
| } |
| |
| // This happens when not holding the mutex for performance reasons. |
| CleanupThreadState(&tstate); |
| |
| return e; |
| } |
| |
| void LIRSCacheShard::UpdateCharge(HandleBase* handle, size_t charge) { |
| DCHECK(initialized_); |
| LIRSThreadState tstate; |
| { |
| // Hold the lock to avoid concurrent evictions |
| std::lock_guard<MutexType> l(mutex_); |
| LIRSHandle* e = static_cast<LIRSHandle*>(handle); |
| // Entries that are UNINITIALIZED or TOMBSTONE do not count towards |
| // the usage and will be destroyed without interacting with the usage. |
| // Skip modifying the charge. |
| // |
| // In the case of UNINITIALIZED, we know that the caller has a handle |
| // for the entry, so it needs to have been in the cache previously |
| // (i.e. it was found via Lookup() or added via Insert()). So, in this |
| // context, UNINITIALIZED can only mean that it has been evicted. |
| if (e->state() == UNINITIALIZED || e->state() == TOMBSTONE) { |
| return; |
| } |
| DCHECK(e->state() == PROTECTED || e->state() == UNPROTECTED); |
| int64_t delta = charge - handle->charge(); |
| handle->set_charge(charge); |
| UpdateMemTracker(delta); |
| // Update usage in existing state, then evict as needed. |
| switch (e->state()) { |
| case PROTECTED: |
| protected_usage_ += delta; |
| EnforceProtectedCapacity(&tstate); |
| break; |
| case UNPROTECTED: |
| unprotected_usage_ += delta; |
| EnforceUnprotectedCapacity(&tstate); |
| break; |
| default: |
| // This can't happen. |
| CHECK(false) << "Unexpected state for UpdateCharge: " << e->state(); |
| break; |
| } |
| } |
| CleanupThreadState(&tstate); |
| } |
| |
| void LIRSCacheShard::Release(HandleBase* handle) { |
| DCHECK(initialized_); |
| LIRSHandle* e = static_cast<LIRSHandle*>(handle); |
| LIRSThreadState tstate; |
| |
| auto state_transition = e->modify_atomic_state([] (AtomicState& cur_state) { |
| DCHECK_GT(cur_state.ref_count, 0); |
| --cur_state.ref_count; |
| if (cur_state.ref_count == 0) { |
| if (cur_state.state == TOMBSTONE || cur_state.state == UNINITIALIZED) { |
| // An entry cannot be evicted until the reference count is zero, so this |
| // must still be resident. In this circumstance, this thread must do the |
| // eviction. |
| DCHECK_EQ(cur_state.residency, RESIDENT); |
| cur_state.residency = EVICTING; |
| } |
| } |
| }); |
| // If we set the residency to EVICTING, then do the eviction. If this is UNINITIALIZED, |
| // this will also free it. |
| if (state_transition.after.residency == EVICTING) { |
| tstate.handles_to_evict.push_back(e); |
| CleanupThreadState(&tstate); |
| } |
| } |
| |
| HandleBase* LIRSCacheShard::Insert(HandleBase* e_in, |
| Cache::EvictionCallback *eviction_callback) { |
| DCHECK(initialized_); |
| LIRSHandle* e = static_cast<LIRSHandle*>(e_in); |
| CHECK_EQ(e->state(), UNINITIALIZED); |
| // Set the remaining LIRSHandle members which were not already set in the constructor. |
| e->eviction_callback_ = eviction_callback; |
| // The caller has already stored the value in the handle (and any underlying storage). |
| // Whether the Insert() is ultimately successful or not, this handle is currently |
| // resident. |
| e->set_resident(); |
| |
| // Insert() can fail. A failed insert is equivalent to a successful insert followed |
| // by an immediate eviction of that entry, so much of the setup code is identical. |
| bool success = true; |
| LIRSThreadState tstate; |
| { |
| std::lock_guard<MutexType> l(mutex_); |
| |
| // Cases: |
| // 1. There is no existing entry. |
| // 2. There is a tombstone entry. |
| // 3. There is a non-tombstone entry (rare) |
| // In any case, the existing entry will be replaced, so go ahead and remove it. |
| LIRSHandle* existing_entry = |
| static_cast<LIRSHandle*>(table_.Remove(e->key(), e->hash())); |
| |
| // The entry is always resident at the start of Insert(), so incorporate this |
| // entry's charge into the memtracker. If Insert() fails, this is decremented as |
| // part of eviction. |
| UpdateMemTracker(e->charge()); |
| // All of these paths can result in evictions |
| if (existing_entry != nullptr) { |
| // Priorities are modified in Lookup, but priorities are not changed in Insert. |
| // The exception is TOMBSTONE entries, which are not impacted by Lookup. |
| // So: |
| // 1. TOMBSTONE becomes PROTECTED. |
| // 2. PROTECTED remains PROTECTED. |
| // 3. UNPROTECTED remains UNPROTECTED. |
| LIRSState existing_state = existing_entry->state(); |
| ToUninitialized(&tstate, existing_entry); |
| if (existing_state == PROTECTED || existing_state == TOMBSTONE) { |
| UninitializedToProtected(&tstate, e); |
| } else { |
| DCHECK_EQ(existing_state, UNPROTECTED); |
| // UninitializedToUnprotected can fail (i.e. the entry inserted may not remain |
| // in the cache). |
| success = UninitializedToUnprotected(&tstate, e); |
| } |
| } else if (protected_usage_ + e->charge() <= protected_capacity_) { |
| // There is available space in the protected area, so add it directly there. |
| UninitializedToProtected(&tstate, e); |
| } else { |
| // UninitializedToUnprotected can fail (i.e. the entry inserted may not remain in |
| // the cache). |
| success = UninitializedToUnprotected(&tstate, e); |
| } |
| // If success=false, the entry is scheduled for eviction and won't be returned. |
| if (success) { |
| // Increment reference, since this will be returned. |
| e->get_reference(); |
| } |
| } |
| |
| // This happens when not holding the mutex for performance reasons. |
| CleanupThreadState(&tstate); |
| |
| return (success ? e : nullptr); |
| } |
| |
| void LIRSCacheShard::CleanupThreadState(LIRSThreadState* tstate) { |
| // evict things that need to be evicted |
| for (LIRSHandle* e : tstate->handles_to_evict) { |
| DCHECK_EQ(e->residency(), EVICTING); |
| DCHECK_EQ(e->num_references(), 0); |
| if (e->eviction_callback_) { |
| e->eviction_callback_->EvictedEntry(e->key(), e->value()); |
| } |
| UpdateMemTracker(-static_cast<int64_t>(e->charge())); |
| // The eviction is done, set the residency back to NOT_RESIDENT |
| auto state_transition = e->modify_atomic_state([] (AtomicState& cur_state) { |
| DCHECK_EQ(cur_state.residency, EVICTING); |
| DCHECK_EQ(cur_state.ref_count, 0); |
| cur_state.residency = NOT_RESIDENT; |
| }); |
| // If the handle transitioned to UNINITIALIZED before we set it to NOT_RESIDENT, |
| // then we are responsible for freeing it. |
| if (state_transition.before.state == UNINITIALIZED) { |
| Free(e); |
| } |
| } |
| tstate->handles_to_evict.clear(); |
| for (LIRSHandle* e : tstate->handles_to_free) { |
| Free(e); |
| } |
| tstate->handles_to_free.clear(); |
| } |
| |
| void LIRSCacheShard::Erase(const Slice& key, uint32_t hash) { |
| DCHECK(initialized_); |
| LIRSThreadState tstate; |
| { |
| std::lock_guard<MutexType> l(mutex_); |
| LIRSHandle* e = static_cast<LIRSHandle*>(table_.Remove(key, hash)); |
| if (e != nullptr) { |
| // Transition from any state to UNINITIALIZED. There may be external references, so |
| // this might not actually free it immediately. |
| ToUninitialized(&tstate, e); |
| } |
| } |
| |
| // This happens when not holding the mutex for performance reasons. |
| CleanupThreadState(&tstate); |
| } |
| |
| size_t LIRSCacheShard::Invalidate(const Cache::InvalidationControl& ctl) { |
| DCHECK(initialized_); |
| DCHECK(false) << "Invalidate() is not implemented for LIRS"; |
| return 0; |
| } |
| |
| vector<HandleBase*> LIRSCacheShard::Dump() { |
| DCHECK(initialized_); |
| std::lock_guard<MutexType> l(mutex_); |
| |
| // For LIRS cache we only collect resident entries (i.e. PROTECTED/UNPROTECTED entries), |
| // and ignore entries that are not resident (i.e. TOMBSTONE entries). |
| vector<HandleBase*> handles; |
| |
| for (LIRSHandle& h : recency_list_) { |
| // First walk through 'recency_list_', only collecting PROTECTED entries, ignoring |
| // the UNPROTECTED/TOMBSTONE entries. UNPROTECTED entries will be collected later from |
| // the unprotected_tombstone_list_. |
| if (h.state() == PROTECTED) { |
| h.get_reference(); |
| handles.push_back(&h); |
| } |
| } |
| |
| if (unprotected_list_front_ != nullptr) { |
| DCHECK(unprotected_list_front_->unprotected_tombstone_list_hook_.is_linked()); |
| // From 'unprotected_list_front_' to the end of 'unprotected_tombstone_list_' are all |
| // UNPROTECTED entries, collecting them all. |
| auto iter = unprotected_tombstone_list_.iterator_to(*unprotected_list_front_); |
| while (iter != unprotected_tombstone_list_.end()) { |
| iter->get_reference(); |
| handles.push_back(&*iter); |
| ++iter; |
| } |
| } |
| |
| return handles; |
| } |
| |
| } // end anonymous namespace |
| |
| template<> |
| CacheShard* NewCacheShardInt<Cache::EvictionPolicy::LIRS>(kudu::MemTracker* mem_tracker, |
| size_t capacity) { |
| return new LIRSCacheShard(mem_tracker, capacity); |
| } |
| |
| } // namespace impala |