blob: e4f05083bd7cabea30dbfb495ca084a45bb28043 [file] [log] [blame]
// This file is derived from cache.cc in the LevelDB project:
//
// Some portions copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// ------------------------------------------------------------
// This file implements a cache based on the MEMKIND library (http://memkind.github.io/memkind/)
// This library makes it easy to program against persistent memory hardware by exposing an API
// which parallels malloc/free, but allocates from persistent memory instead of DRAM.
//
// We use this API to implement a cache which treats persistent memory or
// non-volatile memory as if it were a larger cheaper bank of volatile memory. We
// currently make no use of its persistence properties.
//
// Currently, we only store key/value in NVM. All other data structures such as the
// ShardedLRUCache instances, hash table, etc are in DRAM. The assumption is that
// the ratio of data stored vs overhead is quite high.
#include "kudu/util/nvm_cache.h"
#include <dlfcn.h>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "kudu/gutil/atomic_refcount.h"
#include "kudu/gutil/atomicops.h"
#include "kudu/gutil/bits.h"
#include "kudu/gutil/dynamic_annotations.h"
#include "kudu/gutil/hash/city.h"
#include "kudu/gutil/macros.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/gutil/sysinfo.h"
#include "kudu/util/cache.h"
#include "kudu/util/cache_metrics.h"
#include "kudu/util/flag_tags.h"
#include "kudu/util/locks.h"
#include "kudu/util/metrics.h"
#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"
#ifndef MEMKIND_PMEM_MIN_SIZE
#define MEMKIND_PMEM_MIN_SIZE (1024 * 1024 * 16) // Taken from memkind 1.9.0.
#endif
struct memkind;
// Useful in tests that require accurate cache capacity accounting.
DECLARE_bool(cache_force_single_shard);
DEFINE_string(nvm_cache_path, "/pmem",
"The path at which the NVM cache will try to allocate its memory. "
"This can be a tmpfs or ramfs for testing purposes.");
DEFINE_bool(nvm_cache_simulate_allocation_failure, false,
"If true, the NVM cache will inject failures in calls to memkind_malloc "
"for testing.");
TAG_FLAG(nvm_cache_simulate_allocation_failure, unsafe);
DEFINE_double(nvm_cache_usage_ratio, 1.25,
"A ratio to set the usage of nvm cache. The charge of an item in the nvm "
"cache is equal to the results of memkind_malloc_usable_size multiplied by "
"the ratio.");
TAG_FLAG(nvm_cache_usage_ratio, advanced);
using std::string;
using std::unique_ptr;
using std::vector;
using strings::Substitute;
static bool ValidateNVMCacheUsageRatio(const char* flagname, double value) {
if (value < 1.0) {
LOG(ERROR) << Substitute("$0 must be greater than or equal to 1.0, value $1 is invalid.",
flagname, value);
return false;
}
if (value < 1.25) {
LOG(WARNING) << Substitute("The value of $0 is $1, it is less than recommended "
"value (1.25). Due to memkind fragmentation, an improper "
"ratio will cause allocation failures while capacity-based "
"evictions are not triggered. Raise --nvm_cache_usage_ratio.",
flagname, value);
}
return true;
}
DEFINE_validator(nvm_cache_usage_ratio, &ValidateNVMCacheUsageRatio);
namespace kudu {
namespace {
// Taken together, these typedefs and this macro make it easy to call a
// memkind function:
//
// CALL_MEMKIND(memkind_malloc, vmp_, size);
typedef int (*memkind_create_pmem)(const char*, size_t, memkind**);
typedef int (*memkind_destroy_kind)(memkind*);
typedef void* (*memkind_malloc)(memkind*, size_t);
typedef size_t (*memkind_malloc_usable_size)(memkind*, void*);
typedef void (*memkind_free)(memkind*, void*);
#define CALL_MEMKIND(func_name, ...) ((func_name)g_##func_name)(__VA_ARGS__)
// Function pointers into memkind; set by InitMemkindOps().
void* g_memkind_create_pmem;
void* g_memkind_destroy_kind;
void* g_memkind_malloc;
void* g_memkind_malloc_usable_size;
void* g_memkind_free;
// After InitMemkindOps() is called, true if memkind is available and safe
// to use, false otherwise.
bool g_memkind_available;
std::once_flag g_memkind_ops_flag;
// Try to dlsym() a particular symbol from 'handle', storing the result in 'ptr'
// if successful.
Status TryDlsym(void* handle, const char* sym, void** ptr) {
dlerror(); // Need to clear any existing error first.
void* ret = dlsym(handle, sym);
char* error = dlerror();
if (error) {
return Status::NotSupported(Substitute("could not dlsym $0", sym), error);
}
*ptr = ret;
return Status::OK();
}
// Try to dlopen() memkind and set up all the function pointers we need from it.
//
// Note: in terms of protecting ourselves against changes in memkind, we'll
// notice (and fail) if a symbol is missing, but not if it's signature has
// changed or if there's some subtle behavioral change. A scan of the memkind
// repo suggests that backwards compatibility is enforced: symbols are only
// added and behavioral changes are effected via the introduction of new symbols.
void InitMemkindOps() {
g_memkind_available = false;
// Use RTLD_NOW so that if any of memkind's dependencies aren't satisfied
// (e.g. libnuma is too old and is missing symbols), we'll know up front
// instead of during cache operations.
void* memkind_lib = dlopen("libmemkind.so.0", RTLD_NOW);
if (!memkind_lib) {
LOG(WARNING) << "could not dlopen: " << dlerror();
return;
}
auto cleanup = MakeScopedCleanup([&]() {
dlclose(memkind_lib);
});
#define DLSYM_OR_RETURN(func_name, handle) do { \
const Status _s = TryDlsym(memkind_lib, func_name, handle); \
if (!_s.ok()) { \
LOG(WARNING) << _s.ToString(); \
return; \
} \
} while (0)
DLSYM_OR_RETURN("memkind_create_pmem", &g_memkind_create_pmem);
DLSYM_OR_RETURN("memkind_destroy_kind", &g_memkind_destroy_kind);
DLSYM_OR_RETURN("memkind_malloc", &g_memkind_malloc);
DLSYM_OR_RETURN("memkind_malloc_usable_size", &g_memkind_malloc_usable_size);
DLSYM_OR_RETURN("memkind_free", &g_memkind_free);
#undef DLSYM_OR_RETURN
g_memkind_available = true;
// Need to keep the memkind library handle open so our function pointers
// remain loaded in memory.
cleanup.cancel();
}
typedef simple_spinlock MutexType;
// LRU cache implementation
// An entry is a variable length heap-allocated structure. Entries
// are kept in a circular doubly linked list ordered by access time.
struct LRUHandle {
Cache::EvictionCallback* eviction_callback;
LRUHandle* next_hash;
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
uint32_t key_length;
uint32_t val_length;
Atomic32 refs;
uint32_t hash; // Hash of key(); used for fast sharding and comparisons
uint8_t* kv_data;
Slice key() const {
return Slice(kv_data, key_length);
}
Slice value() const {
return Slice(&kv_data[key_length], val_length);
}
uint8_t* val_ptr() {
return &kv_data[key_length];
}
};
// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested. E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class HandleTable {
public:
HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
~HandleTable() { delete[] list_; }
LRUHandle* Lookup(const Slice& key, uint32_t hash) {
return *FindPointer(key, hash);
}
LRUHandle* Insert(LRUHandle* h) {
LRUHandle** ptr = FindPointer(h->key(), h->hash);
LRUHandle* old = *ptr;
h->next_hash = (old == nullptr ? nullptr : old->next_hash);
*ptr = h;
if (old == nullptr) {
++elems_;
if (elems_ > length_) {
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
Resize();
}
}
return old;
}
LRUHandle* Remove(const Slice& key, uint32_t hash) {
LRUHandle** ptr = FindPointer(key, hash);
LRUHandle* result = *ptr;
if (result != nullptr) {
*ptr = result->next_hash;
--elems_;
}
return result;
}
private:
// The table consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
uint32_t length_;
uint32_t elems_;
LRUHandle** list_;
// Return a pointer to slot that points to a cache entry that
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash & (length_ - 1)];
while (*ptr != nullptr &&
((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
return ptr;
}
void Resize() {
uint32_t new_length = 16;
while (new_length < elems_ * 1.5) {
new_length *= 2;
}
LRUHandle** new_list = new LRUHandle*[new_length];
memset(new_list, 0, sizeof(new_list[0]) * new_length);
uint32_t count = 0;
for (uint32_t i = 0; i < length_; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash & (new_length - 1)];
h->next_hash = *ptr;
*ptr = h;
h = next;
count++;
}
}
DCHECK_EQ(elems_, count);
delete[] list_;
list_ = new_list;
length_ = new_length;
}
};
// A single shard of sharded cache.
class NvmLRUCache {
public:
explicit NvmLRUCache(memkind *vmp);
~NvmLRUCache();
// Separate from constructor so caller can easily make an array of LRUCache
void SetCapacity(size_t capacity) { capacity_ = capacity; }
void SetMetrics(CacheMetrics* metrics) { metrics_ = metrics; }
Cache::Handle* Insert(LRUHandle* e, Cache::EvictionCallback* eviction_callback);
// Like Cache::Lookup, but with an extra "hash" parameter.
Cache::Handle* Lookup(const Slice& key, uint32_t hash, bool caching);
void Release(Cache::Handle* handle);
void Erase(const Slice& key, uint32_t hash);
size_t Invalidate(const Cache::InvalidationControl& ctl);
void* Allocate(size_t size);
private:
void NvmLRU_Remove(LRUHandle* e);
void NvmLRU_Append(LRUHandle* e);
// Just reduce the reference count by 1.
// Return true if last reference
bool Unref(LRUHandle* e);
void FreeEntry(LRUHandle* e);
// Evict the LRU item in the cache, adding it to the linked list
// pointed to by 'to_remove_head'.
void EvictOldestUnlocked(LRUHandle** to_remove_head);
// Free all of the entries in the linked list that has to_free_head
// as its head.
void FreeLRUEntries(LRUHandle* to_free_head);
// Wrapper around memkind_malloc which injects failures based on a flag.
void* MemkindMalloc(size_t size);
// Initialized before use.
size_t capacity_;
// mutex_ protects the following state.
MutexType mutex_;
size_t usage_;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
LRUHandle lru_;
HandleTable table_;
memkind* vmp_;
CacheMetrics* metrics_;
};
NvmLRUCache::NvmLRUCache(memkind* vmp)
: usage_(0),
vmp_(vmp),
metrics_(nullptr) {
// Make empty circular linked list
lru_.next = &lru_;
lru_.prev = &lru_;
}
NvmLRUCache::~NvmLRUCache() {
for (LRUHandle* e = lru_.next; e != &lru_; ) {
LRUHandle* next = e->next;
DCHECK_EQ(e->refs, 1); // Error if caller has an unreleased handle
if (Unref(e)) {
FreeEntry(e);
}
e = next;
}
}
void* NvmLRUCache::MemkindMalloc(size_t size) {
if (PREDICT_FALSE(FLAGS_nvm_cache_simulate_allocation_failure)) {
return nullptr;
}
return CALL_MEMKIND(memkind_malloc, vmp_, size);
}
bool NvmLRUCache::Unref(LRUHandle* e) {
DCHECK_GT(ANNOTATE_UNPROTECTED_READ(e->refs), 0);
return !base::RefCountDec(&e->refs);
}
void NvmLRUCache::FreeEntry(LRUHandle* e) {
DCHECK_EQ(ANNOTATE_UNPROTECTED_READ(e->refs), 0);
if (e->eviction_callback) {
e->eviction_callback->EvictedEntry(e->key(), e->value());
}
if (PREDICT_TRUE(metrics_)) {
metrics_->cache_usage->DecrementBy(e->charge);
metrics_->evictions->Increment();
}
CALL_MEMKIND(memkind_free, vmp_, e);
}
// Allocate NVM memory.
void* NvmLRUCache::Allocate(size_t size) {
return MemkindMalloc(size);
}
void NvmLRUCache::NvmLRU_Remove(LRUHandle* e) {
e->next->prev = e->prev;
e->prev->next = e->next;
usage_ -= e->charge;
}
void NvmLRUCache::NvmLRU_Append(LRUHandle* e) {
// Make "e" newest entry by inserting just before lru_
e->next = &lru_;
e->prev = lru_.prev;
e->prev->next = e;
e->next->prev = e;
usage_ += e->charge;
}
Cache::Handle* NvmLRUCache::Lookup(const Slice& key, uint32_t hash, bool caching) {
LRUHandle* e;
{
std::lock_guard<MutexType> l(mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
// If an entry exists, remove the old entry from the cache
// and re-add to the end of the linked list.
base::RefCountInc(&e->refs);
NvmLRU_Remove(e);
NvmLRU_Append(e);
}
}
// Do the metrics outside of the lock.
if (metrics_) {
metrics_->lookups->Increment();
bool was_hit = (e != nullptr);
if (was_hit) {
if (caching) {
metrics_->cache_hits_caching->Increment();
} else {
metrics_->cache_hits->Increment();
}
} else {
if (caching) {
metrics_->cache_misses_caching->Increment();
} else {
metrics_->cache_misses->Increment();
}
}
}
return reinterpret_cast<Cache::Handle*>(e);
}
void NvmLRUCache::Release(Cache::Handle* handle) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = Unref(e);
if (last_reference) {
FreeEntry(e);
}
}
void NvmLRUCache::EvictOldestUnlocked(LRUHandle** to_remove_head) {
LRUHandle* old = lru_.next;
NvmLRU_Remove(old);
table_.Remove(old->key(), old->hash);
if (Unref(old)) {
old->next = *to_remove_head;
*to_remove_head = old;
}
}
void NvmLRUCache::FreeLRUEntries(LRUHandle* to_free_head) {
while (to_free_head != nullptr) {
LRUHandle* next = to_free_head->next;
FreeEntry(to_free_head);
to_free_head = next;
}
}
Cache::Handle* NvmLRUCache::Insert(LRUHandle* e,
Cache::EvictionCallback* eviction_callback) {
DCHECK(e);
LRUHandle* to_remove_head = nullptr;
e->refs = 2; // One from LRUCache, one for the returned handle
e->eviction_callback = eviction_callback;
if (PREDICT_TRUE(metrics_)) {
metrics_->cache_usage->IncrementBy(e->charge);
metrics_->inserts->Increment();
}
{
std::lock_guard<MutexType> l(mutex_);
NvmLRU_Append(e);
LRUHandle* old = table_.Insert(e);
if (old != nullptr) {
NvmLRU_Remove(old);
if (Unref(old)) {
old->next = to_remove_head;
to_remove_head = old;
}
}
while (usage_ > capacity_ && lru_.next != &lru_) {
EvictOldestUnlocked(&to_remove_head);
}
}
// we free the entries here outside of mutex for
// performance reasons
FreeLRUEntries(to_remove_head);
return reinterpret_cast<Cache::Handle*>(e);
}
void NvmLRUCache::Erase(const Slice& key, uint32_t hash) {
LRUHandle* e;
bool last_reference = false;
{
std::lock_guard<MutexType> l(mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
NvmLRU_Remove(e);
last_reference = Unref(e);
}
}
// mutex not held here
// last_reference will only be true if e != nullptr
if (last_reference) {
FreeEntry(e);
}
}
size_t NvmLRUCache::Invalidate(const Cache::InvalidationControl& ctl) {
size_t invalid_entry_count = 0;
size_t valid_entry_count = 0;
LRUHandle* to_remove_head = nullptr;
{
std::lock_guard<MutexType> l(mutex_);
// rl_.next is the oldest entry in the recency list.
LRUHandle* h = lru_.next;
while (h != nullptr && h != &lru_ &&
ctl.iteration_func(valid_entry_count, invalid_entry_count)) {
if (ctl.validity_func(h->key(), h->value())) {
// Continue iterating over the list.
h = h->next;
++valid_entry_count;
continue;
}
// Copy the handle slated for removal.
LRUHandle* h_to_remove = h;
// Prepare for next iteration of the cycle.
h = h->next;
NvmLRU_Remove(h_to_remove);
table_.Remove(h_to_remove->key(), h_to_remove->hash);
if (Unref(h_to_remove)) {
h_to_remove->next = to_remove_head;
to_remove_head = h_to_remove;
}
++invalid_entry_count;
}
}
FreeLRUEntries(to_remove_head);
return invalid_entry_count;
}
// Determine the number of bits of the hash that should be used to determine
// the cache shard. This, in turn, determines the number of shards.
int DetermineShardBits() {
int bits = PREDICT_FALSE(FLAGS_cache_force_single_shard) ?
0 : Bits::Log2Ceiling(base::NumCPUs());
VLOG(1) << "Will use " << (1 << bits) << " shards for LRU cache.";
return bits;
}
class ShardedLRUCache : public Cache {
private:
unique_ptr<CacheMetrics> metrics_;
vector<unique_ptr<NvmLRUCache>> shards_;
// Number of bits of hash used to determine the shard.
const int shard_bits_;
memkind* vmp_;
static inline uint32_t HashSlice(const Slice& s) {
return util_hash::CityHash64(
reinterpret_cast<const char *>(s.data()), s.size());
}
uint32_t Shard(uint32_t hash) {
// Widen to uint64 before shifting, or else on a single CPU,
// we would try to shift a uint32_t by 32 bits, which is undefined.
return static_cast<uint64_t>(hash) >> (32 - shard_bits_);
}
public:
explicit ShardedLRUCache(size_t capacity, const string& /*id*/, memkind* vmp)
: shard_bits_(DetermineShardBits()),
vmp_(vmp) {
int num_shards = 1 << shard_bits_;
const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
for (int s = 0; s < num_shards; s++) {
unique_ptr<NvmLRUCache> shard(new NvmLRUCache(vmp_));
shard->SetCapacity(per_shard);
shards_.emplace_back(std::move(shard));
}
}
virtual ~ShardedLRUCache() {
shards_.clear();
// Per the note at the top of this file, our cache is entirely volatile.
// Hence, when the cache is destructed, we delete the underlying
// memkind pool.
CALL_MEMKIND(memkind_destroy_kind, vmp_);
}
virtual UniqueHandle Insert(UniquePendingHandle handle,
Cache::EvictionCallback* eviction_callback) OVERRIDE {
LRUHandle* h = reinterpret_cast<LRUHandle*>(DCHECK_NOTNULL(handle.release()));
return UniqueHandle(
shards_[Shard(h->hash)]->Insert(h, eviction_callback),
Cache::HandleDeleter(this));
}
virtual UniqueHandle Lookup(const Slice& key, CacheBehavior caching) OVERRIDE {
const uint32_t hash = HashSlice(key);
return UniqueHandle(
shards_[Shard(hash)]->Lookup(key, hash, caching == EXPECT_IN_CACHE),
Cache::HandleDeleter(this));
}
virtual void Release(Handle* handle) OVERRIDE {
LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
shards_[Shard(h->hash)]->Release(handle);
}
virtual void Erase(const Slice& key) OVERRIDE {
const uint32_t hash = HashSlice(key);
shards_[Shard(hash)]->Erase(key, hash);
}
virtual Slice Value(const UniqueHandle& handle) const OVERRIDE {
return reinterpret_cast<const LRUHandle*>(handle.get())->value();
}
virtual uint8_t* MutableValue(UniquePendingHandle* handle) OVERRIDE {
return reinterpret_cast<LRUHandle*>(handle->get())->val_ptr();
}
virtual void SetMetrics(unique_ptr<CacheMetrics> metrics) OVERRIDE {
metrics_ = std::move(metrics);
for (const auto& shard : shards_) {
shard->SetMetrics(metrics_.get());
}
}
virtual UniquePendingHandle Allocate(Slice key, int val_len, int charge) OVERRIDE {
int key_len = key.size();
DCHECK_GE(key_len, 0);
DCHECK_GE(val_len, 0);
// Try allocating from each of the shards -- if memkind is tight,
// this can cause eviction, so we might have better luck in different
// shards.
for (const auto& shard : shards_) {
UniquePendingHandle ph(static_cast<PendingHandle*>(
shard->Allocate(sizeof(LRUHandle) + key_len + val_len)),
Cache::PendingHandleDeleter(this));
if (ph) {
LRUHandle* handle = reinterpret_cast<LRUHandle*>(ph.get());
uint8_t* buf = reinterpret_cast<uint8_t*>(ph.get());
handle->kv_data = &buf[sizeof(LRUHandle)];
handle->val_length = val_len;
handle->key_length = key_len;
// Multiply the results of memkind_malloc_usable_size by a ratio
// due to the fragmentation is not counted in.
handle->charge = (charge == kAutomaticCharge) ?
CALL_MEMKIND(memkind_malloc_usable_size, vmp_, buf) * FLAGS_nvm_cache_usage_ratio :
charge;
handle->hash = HashSlice(key);
memcpy(handle->kv_data, key.data(), key.size());
return ph;
}
}
// TODO(unknown): increment a metric here on allocation failure.
return UniquePendingHandle(nullptr, Cache::PendingHandleDeleter(this));
}
virtual void Free(PendingHandle* ph) OVERRIDE {
CALL_MEMKIND(memkind_free, vmp_, ph);
}
size_t Invalidate(const InvalidationControl& ctl) override {
size_t invalidated_count = 0;
for (const auto& shard: shards_) {
invalidated_count += shard->Invalidate(ctl);
}
return invalidated_count;
}
};
} // end anonymous namespace
template<>
Cache* NewCache<Cache::EvictionPolicy::LRU,
Cache::MemoryType::NVM>(size_t capacity, const std::string& id) {
std::call_once(g_memkind_ops_flag, InitMemkindOps);
// TODO(adar): we should plumb the failure up the call stack, but at the time
// of writing the NVM cache is only usable by the block cache, and its use of
// the singleton pattern prevents the surfacing of errors.
CHECK(g_memkind_available) << "Memkind not available!";
// memkind_create_pmem() will fail if the capacity is too small, but with
// an inscrutable error. So, we'll check ourselves.
CHECK_GE(capacity, MEMKIND_PMEM_MIN_SIZE)
<< "configured capacity " << capacity << " bytes is less than "
<< "the minimum capacity for an NVM cache: " << MEMKIND_PMEM_MIN_SIZE;
LOG(INFO) << 1 / FLAGS_nvm_cache_usage_ratio * 100 << "% capacity "
<< "from nvm block cache is available due to memkind fragmentation.";
memkind* vmp;
int err = CALL_MEMKIND(memkind_create_pmem, FLAGS_nvm_cache_path.c_str(), capacity, &vmp);
// If we cannot create the cache pool we should not retry.
PLOG_IF(FATAL, err) << "Could not initialize NVM cache library in path "
<< FLAGS_nvm_cache_path.c_str();
return new ShardedLRUCache(capacity, id, vmp);
}
bool CanUseNVMCacheForTests() {
std::call_once(g_memkind_ops_flag, InitMemkindOps);
return g_memkind_available;
}
} // namespace kudu