| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // A Cache is an interface that maps keys to values. It has internal |
| // synchronization and may be safely accessed concurrently from |
| // multiple threads. It may automatically evict entries to make room |
| // for new entries. Values have a specified charge against the cache |
| // capacity. For example, a cache where the values are variable |
| // length strings, may use the length of the string as the charge for |
| // the string. |
| // |
| // This is taken from LevelDB and evolved to fit the kudu codebase. |
| // |
| // TODO(unknown): this is pretty lock-heavy. Would be good to sub out something |
| // a little more concurrent. |
| |
| #pragma once |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <functional> |
| #include <iosfwd> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| |
| #include "kudu/gutil/macros.h" |
| #include "kudu/util/slice.h" |
| |
| namespace kudu { |
| |
| struct CacheMetrics; |
| |
| class Cache { |
| public: |
| // Type of memory backing the cache's storage. |
| enum class MemoryType { |
| DRAM, |
| NVM, |
| }; |
| |
| // Supported eviction policies for the cache. Eviction policy determines what |
| // items to evict if the cache is at capacity when trying to accommodate |
| // an extra item. |
| enum class EvictionPolicy { |
| // The earliest added items are evicted (a.k.a. queue). |
| FIFO, |
| |
| // The least-recently-used items are evicted. |
| LRU, |
| }; |
| |
| // Callback interface which is called when an entry is evicted from the |
| // cache. |
| class EvictionCallback { |
| public: |
| virtual void EvictedEntry(Slice key, Slice value) = 0; |
| virtual ~EvictionCallback() = default; |
| }; |
| |
| Cache() = default; |
| |
| // Destroys all existing entries by calling the "deleter" |
| // function that was passed to the constructor. |
| virtual ~Cache(); |
| |
| // The behavior when calling SetMetrics() when 'metrics_' is already set. |
| enum class ExistingMetricsPolicy { |
| // Calling SetMetrics() again will be a no-op. This is appropriate in tests |
| // that use a singleton cache that is shared across multiple daemons in the |
| // same process, at the cost of not having accurate cache metrics. This is |
| // useful for avoiding races between the destruction of existing metrics |
| // and the setting of new metrics in new daemons. It is expected that this |
| // is only used in tests. |
| kKeep, |
| |
| // SetMetrics() will overwrite the existing metrics. It is up to callers to |
| // ensure this is safe, e.g. by destructing the entity that owned the |
| // original metrics. |
| kReset, |
| }; |
| // Set the cache metrics to update corresponding counters accordingly. |
| virtual void SetMetrics(std::unique_ptr<CacheMetrics> metrics, |
| ExistingMetricsPolicy metrics_policy) = 0; |
| |
| // Opaque handle to an entry stored in the cache. |
| struct Handle { }; |
| |
| // Custom deleter: intended for use with std::unique_ptr<Handle>. |
| class HandleDeleter { |
| public: |
| explicit HandleDeleter(Cache* c) |
| : c_(c) { |
| } |
| |
| void operator()(Cache::Handle* h) const { |
| if (h != nullptr) { |
| c_->Release(h); |
| } |
| } |
| |
| Cache* cache() const { |
| return c_; |
| } |
| |
| private: |
| Cache* c_; |
| }; |
| |
| // UniqueHandle -- a wrapper around opaque Handle structure to facilitate |
| // automatic reference counting of cache's handles. |
| typedef std::unique_ptr<Handle, HandleDeleter> UniqueHandle; |
| |
| // Opaque handle to an entry which is being prepared to be added to the cache. |
| struct PendingHandle { }; |
| |
| // Custom deleter: intended for use with std::unique_ptr<PendingHandle>. |
| class PendingHandleDeleter { |
| public: |
| explicit PendingHandleDeleter(Cache* c) |
| : c_(c) { |
| } |
| |
| void operator()(Cache::PendingHandle* h) const { |
| if (h != nullptr) { |
| c_->Free(h); |
| } |
| } |
| |
| Cache* cache() const { |
| return c_; |
| } |
| |
| private: |
| Cache* c_; |
| }; |
| |
| // UniquePendingHandle -- a wrapper around opaque PendingHandle structure |
| // to facilitate automatic reference counting newly allocated cache's handles. |
| typedef std::unique_ptr<PendingHandle, PendingHandleDeleter> UniquePendingHandle; |
| |
| // Passing EXPECT_IN_CACHE will increment the hit/miss metrics that track the number of times |
| // blocks were requested that the users were hoping to get the block from the cache, along with |
| // with the basic metrics. |
| // Passing NO_EXPECT_IN_CACHE will only increment the basic metrics. |
| // This helps in determining if we are effectively caching the blocks that matter the most. |
| enum CacheBehavior { |
| EXPECT_IN_CACHE, |
| NO_EXPECT_IN_CACHE |
| }; |
| |
| // If the cache has no mapping for "key", returns NULL. |
| // |
| // Else return a handle that corresponds to the mapping. |
| // |
| // Sample usage: |
| // |
| // unique_ptr<Cache> cache(NewCache(...)); |
| // ... |
| // { |
| // Cache::UniqueHandle h(cache->Lookup(...))); |
| // ... |
| // } // 'h' is automatically released here |
| // |
| // Or: |
| // |
| // unique_ptr<Cache> cache(NewCache(...)); |
| // ... |
| // { |
| // auto h(cache->Lookup(...))); |
| // ... |
| // } // 'h' is automatically released here |
| // |
| virtual UniqueHandle Lookup(const Slice& key, CacheBehavior caching) = 0; |
| |
| // If the cache contains entry for key, erase it. Note that the |
| // underlying entry will be kept around until all existing handles |
| // to it have been released. |
| virtual void Erase(const Slice& key) = 0; |
| |
| // Return the value encapsulated in a raw handle returned by a successful |
| // Lookup(). |
| virtual Slice Value(const UniqueHandle& handle) const = 0; |
| |
| |
| // ------------------------------------------------------------ |
| // Insertion path |
| // ------------------------------------------------------------ |
| // |
| // Because some cache implementations (eg NVM) manage their own memory, and because we'd |
| // like to read blocks directly into cache-managed memory rather than causing an extra |
| // memcpy, the insertion of a new element into the cache requires two phases. First, a |
| // PendingHandle is allocated with space for the value, and then it is later inserted. |
| // |
| // For example: |
| // |
| // auto ph(cache_->Allocate("my entry", value_size, charge)); |
| // if (!ReadDataFromDisk(cache_->MutableValue(&ph)).ok()) { |
| // ... error handling ... |
| // return; |
| // } |
| // UniqueHandle h(cache_->Insert(std::move(ph), my_eviction_callback)); |
| // ... |
| // // 'h' is automatically released. |
| |
| // Indicates that the charge of an item in the cache should be calculated |
| // based on its memory consumption. |
| static constexpr int kAutomaticCharge = -1; |
| |
| // Allocate space for a new entry to be inserted into the cache. |
| // |
| // The provided 'key' is copied into the resulting handle object. |
| // The allocated handle has enough space such that the value can |
| // be written into cache_->MutableValue(&handle). |
| // |
| // If 'charge' is not 'kAutomaticCharge', then the cache capacity will be charged |
| // the explicit amount. This is useful when caching items that are small but need to |
| // maintain a bounded count (eg file descriptors) rather than caring about their actual |
| // memory usage. It is also useful when caching items for whom calculating |
| // memory usage is a complex affair (i.e. items containing pointers to |
| // additional heap allocations). |
| // |
| // Note that this does not mutate the cache itself: lookups will |
| // not be able to find the provided key until it is inserted. |
| // |
| // It is possible that this will return a nullptr wrapped in a std::unique_ptr |
| // if the cache is above its capacity and eviction fails to free up enough |
| // space for the requested allocation. |
| // |
| // The returned handle owns the allocated memory. |
| virtual UniquePendingHandle Allocate(Slice key, int val_len, int charge) = 0; |
| |
| // Default 'charge' should be kAutomaticCharge |
| // (default arguments on virtual functions are prohibited). |
| UniquePendingHandle Allocate(Slice key, int val_len) { |
| return Allocate(key, val_len, kAutomaticCharge); |
| } |
| |
| virtual uint8_t* MutableValue(UniquePendingHandle* handle) = 0; |
| |
| // Commit a prepared entry into the cache. |
| // |
| // Returns a handle that corresponds to the mapping. This method always |
| // succeeds and returns a non-null entry, since the space was reserved above. |
| // |
| // The 'pending' entry passed here should have been allocated using |
| // Cache::Allocate() above. |
| // |
| // If 'eviction_callback' is non-NULL, then it will be called when the |
| // entry is later evicted or when the cache shuts down. |
| virtual UniqueHandle Insert(UniquePendingHandle pending, |
| EvictionCallback* eviction_callback) = 0; |
| |
| // Forward declaration to simplify the layout of types/typedefs needed for the |
| // Invalidate() method while trying to adhere to the code style guide. |
| struct InvalidationControl; |
| |
| // Invalidate cache's entries, effectively evicting non-valid ones from the |
| // cache. The invalidation process iterates over the cache's recency list(s), |
| // from best candidate for eviction to the worst. |
| // |
| // The provided control structure 'ctl' is responsible for the following: |
| // * determine whether an entry is valid or not |
| // * determine how to iterate over the entries in the cache's recency list |
| // |
| // NOTE: The invalidation process might hold a lock while iterating over |
| // the cache's entries. Using proper IterationFunc might help to reduce |
| // contention with the concurrent request for the cache's contents. |
| // See the in-line documentation for IterationFunc for more details. |
| virtual size_t Invalidate(const InvalidationControl& ctl) = 0; |
| |
| // Functor to define a criterion on a cache entry's validity. Upon call |
| // of Cache::Invalidate() method, if the functor returns 'false' for the |
| // specified key and value, the cache evicts the entry, otherwise the entry |
| // stays in the cache. |
| typedef std::function<bool(Slice /* key */, |
| Slice /* value */)> |
| ValidityFunc; |
| |
| // Functor to define whether to continue or stop iterating over the cache's |
| // entries based on the number of encountered invalid and valid entries |
| // during the Cache::Invalidate() call. If a cache contains multiple |
| // sub-caches (e.g., shards), those parameters are per sub-cache. For example, |
| // in case of multi-shard cache, when the 'iteration_func' returns 'false', |
| // the invalidation at current shard stops and switches to the next |
| // non-yet-processed shard, if any is present. |
| // |
| // The choice of the signature for the iteration functor is to allow for |
| // effective purging of non-valid (e.g., expired) entries in caches with |
| // the FIFO eviction policy (e.g., TTL caches). |
| // |
| // The first parameter of the functor is useful for short-circuiting |
| // the invalidation process once some valid entries have been encountered. |
| // For example, that's useful in case if the recency list has its entries |
| // ordered in FIFO-like order (e.g., TTL cache with FIFO eviction policy), |
| // so most-likely-invalid entries are in the very beginning of the list. |
| // In the latter case, once a valid (e.g., not yet expired) entry is |
| // encountered, there is no need to iterate any further: all the entries past |
| // the first valid one in the recency list should be valid as well. |
| // |
| // The second parameter is useful when the validity criterion is fuzzy, |
| // but there is a target number of entries to invalidate during each |
| // invocation of the Invalidate() method or there is some logic that reads |
| // the cache's metric(s) once the given number of entries have been evicted: |
| // e.g., compare the result memory footprint of the cache against a threshold |
| // to decide whether to continue invalidation of entries. |
| // |
| // Summing both parameters of the functor is useful when it's necessary to |
| // limit the number of entries processed per one invocation of the |
| // Invalidate() method. It makes sense in cases when a 'lazy' invalidation |
| // process is run by a periodic task along with a significant amount of |
| // concurrent requests to the cache, and the number of entries in the cache |
| // is huge. Given the fact that in most cases it's necessary to guard |
| // the access to the cache's recency list while iterating over it entries, |
| // limiting the number of entries to process at once allows for better control |
| // over the duration of the guarded/locked sections. |
| typedef std::function<bool(size_t /* valid_entries_num */, |
| size_t /* invalid_entries_num */)> |
| IterationFunc; |
| |
| // A helper function for 'validity_func' of the Invalidate() method: |
| // invalidate all entries. |
| static const ValidityFunc kInvalidateAllEntriesFunc; |
| |
| // A helper function for 'iteration_func' of the Invalidate() method: |
| // examine all entries. |
| static const IterationFunc kIterateOverAllEntriesFunc; |
| |
| // Control structure for the Invalidate() method. Combines the validity |
| // and the iteration functors. |
| struct InvalidationControl { |
| // NOLINTNEXTLINE(google-explicit-constructor) |
| InvalidationControl(ValidityFunc vfunctor = kInvalidateAllEntriesFunc, |
| IterationFunc ifunctor = kIterateOverAllEntriesFunc) |
| : validity_func(std::move(vfunctor)), |
| iteration_func(std::move(ifunctor)) { |
| } |
| const ValidityFunc validity_func; |
| const IterationFunc iteration_func; |
| }; |
| |
| protected: |
| // Release a mapping returned by a previous Lookup(), using raw handle. |
| // REQUIRES: handle must not have been released yet. |
| // REQUIRES: handle must have been returned by a method on *this. |
| virtual void Release(Handle* handle) = 0; |
| |
| // Free 'ptr', which must have been previously allocated using 'Allocate'. |
| virtual void Free(PendingHandle* ptr) = 0; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(Cache); |
| }; |
| |
| // A template helper function to instantiate a cache of particular |
| // 'eviction_policy' flavor, backed by the given storage 'mem_type', |
| // where 'capacity' specifies the capacity of the result cache, |
| // and 'id' specifies its identifier. |
| template<Cache::EvictionPolicy eviction_policy = Cache::EvictionPolicy::LRU, |
| Cache::MemoryType mem_type = Cache::MemoryType::DRAM> |
| Cache* NewCache(size_t capacity, const std::string& id); |
| |
| // Create a new FIFO cache with a fixed size capacity. This implementation |
| // of Cache uses the first-in-first-out eviction policy and stored in DRAM. |
| template<> |
| Cache* NewCache<Cache::EvictionPolicy::FIFO, |
| Cache::MemoryType::DRAM>(size_t capacity, const std::string& id); |
| |
| // Create a new LRU cache with a fixed size capacity. This implementation |
| // of Cache uses the least-recently-used eviction policy and stored in DRAM. |
| template<> |
| Cache* NewCache<Cache::EvictionPolicy::LRU, |
| Cache::MemoryType::DRAM>(size_t capacity, const std::string& id); |
| |
| // A helper method to output cache memory type into ostream. |
| std::ostream& operator<<(std::ostream& os, Cache::MemoryType mem_type); |
| |
| } // namespace kudu |