blob: f7048956f9b226332af4c1d3e2c19dc8161433cd [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <functional>
#include <list>
#include <memory>
#include <mutex>
#include <tuple>
#include <unordered_map>
#include <boost/intrusive/list.hpp>
#include "gutil/macros.h"
#include "util/spinlock.h"
namespace impala {
/// LruMultiCache is a threadsafe Least Recently Used Cache built on std::unordered_map
/// and boost::intrusive::list
///
/// Features
/// - can store multiple objects with the same key
/// - provides unique access to the freshest available stored object
/// - soft capacity to limit stored objects
/// - EvictOlderThan() function to age out unused objects
/// - O(1) operations (except EvictOlderThan(), which can trigger multiple evictions)
/// - automatic release of objects with RAII accessor
/// - explicit release/destroy through accessor
/// - prevents aging by removing empty owning lists
///
/// Limitations
/// - User has to guarantee the cache outlives the accessors
/// - The cache can't force the capacity limit, it can overshoot in case all the objects
/// are in use and we emplace more than the capacity, releases will evict the objects in
/// this case
/// - A bit heavy weight for simple usage (e.g. no thread safety, no unique access, no
/// multi storage, no timestamp based eviction), it was designed to store
/// HdfsFileHandles
/// - Only emplace of objects are supported and it locks the cache during construction.
/// - Inefficient memory cache usage
///
/// Design
/// _ _ _ _ _
/// |_|_|_|_|_| unordered_map buckets
/// | |
/// _ _| _ _|_
/// |0|1| |0|0|1| owning lists, 0 is available, 1 is in use
/// \ / /
/// (LRU calculations)
/// |
/// _ _ V _
/// |0|->|0|->|0| LRU list
///
///
///
/// Cache
/// - std::unordered_map stores std::list of the objects extended with internal
/// information, these lists are the owners
/// - Owning lists are maintaining LRU order only for available objects (order does
/// not matter for currently used objects) to support O(1) get, the least recently
/// used, currently available element is at the front of the list, if there is one
/// - boost::intrusive::list maintains an LRU list of the currently available objects,
/// this is an intrusive (as known as not owning) list, used for eviction
///
/// Accessor
/// - RAII object which provides unique access to and object and releases
/// automatically
/// - Support explicit release and destroy too
///
template <typename KeyType, typename ValueType>
class LruMultiCache {
public:
/// Definition is at the bottom, because it uses type defitions from private scope
class Accessor;
explicit LruMultiCache(size_t capacity = 0);
/// Making sure the cache stays in place, reference of it is used for release/destroy
LruMultiCache(LruMultiCache&&) = delete;
LruMultiCache& operator=(LruMultiCache&&) = delete;
DISALLOW_COPY_AND_ASSIGN(LruMultiCache);
/// Returns the number of stored objects in O(1) time
size_t Size();
/// Returns the number of owned lists in O(1) time
size_t NumberOfKeys();
void SetCapacity(size_t new_capacity);
/// Returns a unique accessor to the freshest available objects
[[nodiscard]] Accessor Get(const KeyType& key);
/// Emplace a new object and return a unique accessor to it.
/// Variadic template is used to forward the rest of the arguments to the stored
/// object's constructor
template <typename... Args>
[[nodiscard]] Accessor EmplaceAndGet(const KeyType& key, Args&&... args);
/// Evicts available objects with too old release time
void EvictOlderThan(uint64_t oldest_allowed_timestamp);
/// Number of available objects, O(n) complexity, for testing purposes
size_t NumberOfAvailableObjects();
/// Force rehash by increase bucket count, for testing purposes
void Rehash();
private:
/// Doubly linked list and auto_unlink is used for O(1) remove from LRU list, in case of
/// get and evict.
typedef boost::intrusive::list_member_hook<
boost::intrusive::link_mode<boost::intrusive::auto_unlink>>
link_type;
/// Internal type storing everything needed for O(1) operations
struct ValueType_internal {
typedef std::list<ValueType_internal> Container_internal;
/// Variadic template is used to support emplace
template <typename... Args>
explicit ValueType_internal(LruMultiCache& cache, const KeyType& key,
Container_internal& container, Args&&... args);
bool IsAvailable();
/// Member hook for LRU list
link_type member_hook;
/// std::unordered_map::iterators are invalidated during rehash, but it has stable
/// references
/// LruMultiCache reference is used during release/destroy.
LruMultiCache& cache;
/// Key is used to remove empty owning list in case of eviction or destruction of the
/// last element
const KeyType& key;
/// Container reference and iterator is used during release/destroy.
/// Container reference could be spared by using key, but this is faster, spares a
/// hash call
Container_internal& container;
typename Container_internal::iterator it;
/// This is the object the user wants to cache
ValueType value;
/// Timestamp of the last release of the object, used only by EvictOlderThan()
uint64_t timestamp_seconds;
};
/// Owning list typedef
typedef std::list<ValueType_internal> Container;
/// Hash table typedef
typedef std::unordered_map<KeyType, Container> HashTableType;
typedef boost::intrusive::member_hook<ValueType_internal, link_type,
&ValueType_internal::member_hook>
MemberHookOption;
/// No constant time size to support self unlink, cache size is tracked by the class
typedef boost::intrusive::list<ValueType_internal, MemberHookOption,
boost::intrusive::constant_time_size<false>>
LruListType;
void Release(ValueType_internal* p_value_internal);
void Destroy(ValueType_internal* p_value_internal);
/// Used by public functions, if an object became available but the cache if over
/// capacity, it removes it
void EvictOneIfNeeded();
/// Used by EvictOlderThan()
void EvictOne(ValueType_internal& value_internal);
HashTableType hash_table_;
LruListType lru_list_;
size_t capacity_;
size_t size_;
/// Protects access to cache. No need for read/write cache as there is no costly
/// pure read operation
SpinLock lock_;
public:
/// RAII Accessor to give unqiue access for a cached object
class Accessor {
public:
/// Default construction is used to support usage as in/out parameter
Accessor(ValueType_internal* p_value_internal = nullptr);
/// Similar interface to unique_ptr, only movable
Accessor(Accessor&&);
Accessor& operator=(Accessor&&);
DISALLOW_COPY_AND_ASSIGN(Accessor);
/// Automatic release in destructor
~Accessor();
/// Returns a pointer to the object to the user.
/// Returns nullptr if it's an empty accessor;
ValueType* Get();
/// Returns a pointer to the stored key;
/// Returns nullptr if it's an empty accessor;
const KeyType* const GetKey() const;
/// Explicit release of the object
void Release();
/// Explicit destruction of the object
void Destroy();
private:
ValueType_internal* p_value_internal_;
};
};
} // namespace impala