blob: 5571ba2e5def89b19f045e5be5dfee4411d421ed [file]
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// A Cache is an interface that maps keys to values. It has internal
// synchronization and may be safely accessed concurrently from
// multiple threads. It may automatically evict entries to make room
// for new entries. Values have a specified charge against the cache
// capacity. For example, a cache where the values are variable
// length strings, may use the length of the string as the charge for
// the string.
//
// This is taken from LevelDB and evolved to fit the kudu codebase.
//
// TODO(unknown): this is pretty lock-heavy. Would be good to sub out something
// a little more concurrent.
#pragma once
#include <boost/algorithm/string.hpp>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iosfwd>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include "kudu/gutil/macros.h"
#include "kudu/util/slice.h"
#include "common/status.h"
using kudu::Slice;
namespace impala {
class Cache {
public:
// Supported eviction policies for the cache. Eviction policy determines what
// items to evict if the cache is at capacity when trying to accommodate
// an extra item.
enum class EvictionPolicy {
// The earliest added items are evicted (a.k.a. queue).
FIFO,
// The least-recently-used items are evicted.
LRU,
// LIRS (Low Inter-reference Recency Set)
LIRS,
};
static EvictionPolicy ParseEvictionPolicy(const std::string& policy_string) {
string upper_policy = boost::to_upper_copy(policy_string);
if (upper_policy == "LRU") {
return Cache::EvictionPolicy::LRU;
} else if (upper_policy == "LIRS") {
return Cache::EvictionPolicy::LIRS;
} else if (upper_policy == "FIFO") {
return Cache::EvictionPolicy::FIFO;
}
LOG(FATAL) << "Unsupported eviction policy: " << policy_string;
return Cache::EvictionPolicy::LRU;
}
// Callback interface which is called when an entry is evicted from the
// cache.
class EvictionCallback {
public:
virtual void EvictedEntry(Slice key, Slice value) = 0;
virtual ~EvictionCallback() = default;
};
Cache() = default;
// Destroys all existing entries by calling the "deleter"
// function that was passed to the constructor.
virtual ~Cache();
// Initialization function for the cache. This must be called before using
// any other methods. It returns error status if there is any issue with
// the cache or its parameters.
virtual Status Init() = 0;
// Opaque handle to an entry stored in the cache.
struct Handle { };
// Custom deleter: intended for use with std::unique_ptr<Handle>.
class HandleDeleter {
public:
explicit HandleDeleter(Cache* c)
: c_(c) {
}
// It is useful to have a constructor without arguments, as
// this makes it easier to embed in containers.
explicit HandleDeleter()
: c_(nullptr) {}
void operator()(Cache::Handle* h) const {
if (h != nullptr) {
DCHECK(c_ != nullptr);
c_->Release(h);
}
}
Cache* cache() const {
return c_;
}
private:
Cache* c_;
};
// UniqueHandle -- a wrapper around opaque Handle structure to facilitate
// automatic reference counting of cache's handles.
typedef std::unique_ptr<Handle, HandleDeleter> UniqueHandle;
// Opaque handle to an entry which is being prepared to be added to the cache.
struct PendingHandle { };
// Custom deleter: intended for use with std::unique_ptr<PendingHandle>.
class PendingHandleDeleter {
public:
explicit PendingHandleDeleter(Cache* c)
: c_(c) {
}
// It is useful to have a constructor without arguments, as
// this makes it easier to embed in containers.
explicit PendingHandleDeleter()
: c_(nullptr) {}
void operator()(Cache::PendingHandle* h) const {
if (h != nullptr) {
DCHECK(c_ != nullptr);
c_->Free(h);
}
}
Cache* cache() const {
return c_;
}
private:
Cache* c_;
};
// UniquePendingHandle -- a wrapper around opaque PendingHandle structure
// to facilitate automatic reference counting newly allocated cache's handles.
typedef std::unique_ptr<PendingHandle, PendingHandleDeleter> UniquePendingHandle;
// There are times when callers may want to perform a Lookup() without having it
// impact the priorities of the cache elements. For example, if the caller is
// doing a Lookup() as an internal part of its implementation that does not
// correspond to actual user activity (or duplicates user activity).
//
// When LookupBehavior is NORMAL (the default), the cache policy will update the
// priorities of entries. When LookupBehavior is NO_UPDATE, the cache policy will
// not update the priority of any entry.
enum LookupBehavior {
NORMAL,
NO_UPDATE
};
// If the cache has no mapping for "key", returns NULL.
//
// Else return a handle that corresponds to the mapping.
//
// Handles are not intended to be held for significant periods of time. Also,
// threads should avoid getting multiple handles for the same key simultaneously.
//
// Sample usage:
//
// unique_ptr<Cache> cache(NewCache(...));
// ...
// {
// Cache::UniqueHandle h(cache->Lookup(...)));
// ...
// } // 'h' is automatically released here
//
// Or:
//
// unique_ptr<Cache> cache(NewCache(...));
// ...
// {
// auto h(cache->Lookup(...)));
// ...
// } // 'h' is automatically released here
//
virtual UniqueHandle Lookup(const Slice& key, LookupBehavior behavior = NORMAL) = 0;
// Return the charge encapsulate in a raw handle returned by a successful
// Lookup().
virtual size_t Charge(const UniqueHandle& handle) = 0;
// Update the charge of a handle after insertion.
virtual void UpdateCharge(const UniqueHandle& handle, size_t charge) = 0;
// If the cache contains entry for key, erase it. Note that the
// underlying entry will be kept around until all existing handles
// to it have been released.
virtual void Erase(const Slice& key) = 0;
// Return the key encapsulated in a raw handle returned by a successful
// Lookup().
virtual Slice Key(const UniqueHandle& handle) const = 0;
// Return the value encapsulated in a raw handle returned by a successful
// Lookup().
virtual Slice Value(const UniqueHandle& handle) const = 0;
// ------------------------------------------------------------
// Insertion path
// ------------------------------------------------------------
//
// Because some cache implementations (eg NVM) manage their own memory, and because we'd
// like to read blocks directly into cache-managed memory rather than causing an extra
// memcpy, the insertion of a new element into the cache requires two phases. First, a
// PendingHandle is allocated with space for the value, and then it is later inserted.
//
// For example:
//
// auto ph(cache_->Allocate("my entry", value_size, charge));
// if (!ReadDataFromDisk(cache_->MutableValue(&ph)).ok()) {
// ... error handling ...
// return;
// }
// UniqueHandle h(cache_->Insert(std::move(ph), my_eviction_callback));
// ...
// // 'h' is automatically released.
// Indicates that the charge of an item in the cache should be calculated
// based on its memory consumption.
static constexpr size_t kAutomaticCharge = std::numeric_limits<size_t>::max();
// Returns the maximum charge that will be accepted by this cache.
// Can be used to avoid generating data that we won't be able to insert.
virtual size_t MaxCharge() const = 0;
// Allocate space for a new entry to be inserted into the cache.
//
// The provided 'key' is copied into the resulting handle object.
// The allocated handle has enough space such that the value can
// be written into cache_->MutableValue(&handle).
//
// If 'charge' is not 'kAutomaticCharge', then the cache capacity will be charged
// the explicit amount. This is useful when caching items that are small but need to
// maintain a bounded count (eg file descriptors) rather than caring about their actual
// memory usage. It is also useful when caching items for whom calculating
// memory usage is a complex affair (i.e. items containing pointers to
// additional heap allocations).
//
// Note that this does not mutate the cache itself: lookups will
// not be able to find the provided key until it is inserted.
//
// It is possible that this will return a nullptr wrapped in a std::unique_ptr
// if the cache is above its capacity and eviction fails to free up enough
// space for the requested allocation.
//
// The returned handle owns the allocated memory.
virtual UniquePendingHandle Allocate(Slice key, int val_len, size_t charge) = 0;
// Default 'charge' should be kAutomaticCharge
// (default arguments on virtual functions are prohibited).
UniquePendingHandle Allocate(Slice key, int val_len) {
return Allocate(key, val_len, kAutomaticCharge);
}
// Get the mutable space created by Allocate.
virtual uint8_t* MutableValue(UniquePendingHandle* handle) const = 0;
// Commit a prepared entry into the cache.
//
// The 'pending' entry passed here should have been allocated using
// Cache::Allocate() above.
//
// This method is not guaranteed to succeed. If it succeeds, it returns a handle
// that corresponds to the mapping. If it fails, it returns a null handle.
//
// Handles are not intended to be held for significant periods of time. Also,
// threads should avoid getting multiple handles for the same key simultaneously.
//
// If 'eviction_callback' is non-NULL, then it will be called when the
// entry is later evicted or when the cache shuts down. If the Insert()
// fails, the entry is immediately destroyed and the eviction callback is called
// before the return of Insert().
virtual UniqueHandle Insert(UniquePendingHandle pending,
EvictionCallback* eviction_callback) = 0;
// Forward declaration to simplify the layout of types/typedefs needed for the
// Invalidate() method while trying to adhere to the code style guide.
struct InvalidationControl;
// Invalidate cache's entries, effectively evicting non-valid ones from the
// cache. The invalidation process iterates over the cache's recency list(s),
// from best candidate for eviction to the worst.
//
// The provided control structure 'ctl' is responsible for the following:
// * determine whether an entry is valid or not
// * determine how to iterate over the entries in the cache's recency list
//
// NOTE: The invalidation process might hold a lock while iterating over
// the cache's entries. Using proper IterationFunc might help to reduce
// contention with the concurrent request for the cache's contents.
// See the in-line documentation for IterationFunc for more details.
virtual size_t Invalidate(const InvalidationControl& ctl) = 0;
// Walk through all valid entries in the cache and push their handles into the return
// vector.
virtual std::vector<UniqueHandle> Dump() = 0;
// Functor to define a criterion on a cache entry's validity. Upon call
// of Cache::Invalidate() method, if the functor returns 'false' for the
// specified key and value, the cache evicts the entry, otherwise the entry
// stays in the cache.
typedef std::function<bool(Slice /* key */,
Slice /* value */)>
ValidityFunc;
// Functor to define whether to continue or stop iterating over the cache's
// entries based on the number of encountered invalid and valid entries
// during the Cache::Invalidate() call. If a cache contains multiple
// sub-caches (e.g., shards), those parameters are per sub-cache. For example,
// in case of multi-shard cache, when the 'iteration_func' returns 'false',
// the invalidation at current shard stops and switches to the next
// non-yet-processed shard, if any is present.
//
// The choice of the signature for the iteration functor is to allow for
// effective purging of non-valid (e.g., expired) entries in caches with
// the FIFO eviction policy (e.g., TTL caches).
//
// The first parameter of the functor is useful for short-circuiting
// the invalidation process once some valid entries have been encountered.
// For example, that's useful in case if the recency list has its entries
// ordered in FIFO-like order (e.g., TTL cache with FIFO eviction policy),
// so most-likely-invalid entries are in the very beginning of the list.
// In the latter case, once a valid (e.g., not yet expired) entry is
// encountered, there is no need to iterate any further: all the entries past
// the first valid one in the recency list should be valid as well.
//
// The second parameter is useful when the validity criterion is fuzzy,
// but there is a target number of entries to invalidate during each
// invocation of the Invalidate() method or there is some logic that reads
// the cache's metric(s) once the given number of entries have been evicted:
// e.g., compare the result memory footprint of the cache against a threshold
// to decide whether to continue invalidation of entries.
//
// Summing both parameters of the functor is useful when it's necessary to
// limit the number of entries processed per one invocation of the
// Invalidate() method. It makes sense in cases when a 'lazy' invalidation
// process is run by a periodic task along with a significant amount of
// concurrent requests to the cache, and the number of entries in the cache
// is huge. Given the fact that in most cases it's necessary to guard
// the access to the cache's recency list while iterating over it entries,
// limiting the number of entries to process at once allows for better control
// over the duration of the guarded/locked sections.
typedef std::function<bool(size_t /* valid_entries_num */,
size_t /* invalid_entries_num */)>
IterationFunc;
// A helper function for 'validity_func' of the Invalidate() method:
// invalidate all entries.
static const ValidityFunc kInvalidateAllEntriesFunc;
// A helper function for 'iteration_func' of the Invalidate() method:
// examine all entries.
static const IterationFunc kIterateOverAllEntriesFunc;
// Control structure for the Invalidate() method. Combines the validity
// and the iteration functors.
struct InvalidationControl {
// NOLINTNEXTLINE(google-explicit-constructor)
InvalidationControl(ValidityFunc vfunctor = kInvalidateAllEntriesFunc,
IterationFunc ifunctor = kIterateOverAllEntriesFunc)
: validity_func(std::move(vfunctor)),
iteration_func(std::move(ifunctor)) {
}
const ValidityFunc validity_func;
const IterationFunc iteration_func;
};
protected:
// Release a mapping returned by a previous Lookup(), using raw handle.
// REQUIRES: handle must not have been released yet.
// REQUIRES: handle must have been returned by a method on *this.
virtual void Release(Handle* handle) = 0;
// Free 'ptr', which must have been previously allocated using 'Allocate'.
virtual void Free(PendingHandle* ptr) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(Cache);
};
// Instantiate a cache of a particular 'policy' flavor with the specified 'capacity'
// and identifier 'id'.
Cache* NewCache(Cache::EvictionPolicy policy, size_t capacity, const std::string& id);
} // namespace impala