blob: f7154b61ab9f7abc6a698c55937449f4bfd3e6c6 [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.evictor;
import static com.sleepycat.je.EnvironmentFailureException.unexpectedState;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.ALLOC_FAILURE;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.ALLOC_OVERFLOW;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.BINS_LOADED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.BINS_STORED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.CACHED_BINS;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.CACHED_BIN_DELTAS;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.CACHED_LNS;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.CRITICAL_NODES_TARGETED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.DIRTY_NODES_EVICTED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.GROUP_DESC;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.GROUP_NAME;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.LNS_EVICTED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.LNS_LOADED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.LNS_STORED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.LRU_SIZE;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.NODES_EVICTED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.NODES_MUTATED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.NODES_SKIPPED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.NODES_STRIPPED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.NODES_TARGETED;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.THREAD_UNAVAILABLE;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.TOTAL_BLOCKS;
import static com.sleepycat.je.evictor.OffHeapStatDefinition.TOTAL_BYTES;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.Checksum;
import com.sleepycat.je.CacheMode;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.EnvironmentMutableConfig;
import com.sleepycat.je.StatsConfig;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbConfigManager;
import com.sleepycat.je.dbi.EnvConfigObserver;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.MemoryBudget;
import com.sleepycat.je.evictor.Evictor.EvictionSource;
import com.sleepycat.je.log.LogEntryType;
import com.sleepycat.je.log.Provisional;
import com.sleepycat.je.log.entry.BINDeltaLogEntry;
import com.sleepycat.je.log.entry.INLogEntry;
import com.sleepycat.je.tree.BIN;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.LN;
import com.sleepycat.je.utilint.Adler32;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.IntStat;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.LongStat;
import com.sleepycat.je.utilint.Pair;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.StoppableThreadFactory;
import com.sleepycat.je.utilint.VLSN;
/**
* Off-heap cache and evictor.
*
* Overview
* --------
* When an LN or BIN is evicted from the main cache it is moved off-heap. The
* off-heap evictor (this class) will apply the same LRU algorithm and
* CacheMode logic that is used by the main evictor. When an off-heap cache is
* used, the main evictor will not place dirty INs on a priority 2 LRU list,
* and will not perform LN stripping or BIN delta mutation; instead, these
* actions become the responsibility of the off-heap evictor.
*
* UINs are not stored off-heap because the complexity this would add is not
* worth the benefits. An extremely large data set can be represented by the
* UINs that fit in a 10GB main cache, so the lack of off-heap UINs is not
* considered a deficiency.
*
* Movement of LNs and BINs between the two caches is performed as follows.
* Note that LNs and BINs are not moved off-heap if they are deleted nor if
* they belong to an internal database. And of course, embedded and duplicate
* DB LNs are not stored separately off-heap.
*
* When an LN or a BIN is evicted from main, it is stored off-heap.
*
* If the off-heap memory block cannot be allocated, the object is not stored
* and no exception is thrown. This prevents allocation failures from causing
* CRUD operation failures. Stats about allocation failures are maintained and
* a SEVERE message is logged when the failure is because no more system
* memory is available. See the OffHeapAllocator interface for details.
*
* For an off-heap LN with a parent BIN in main cache, the LN's memory ID is
* maintained in the BIN. The BIN is assigned an off-heap LRU index so the
* off-heap evictor can perform off-heap LN stripping. In this case, the BIN
* is in both a main and off-heap LRU list. For now at least (TODO), because
* deferred write DBs are not supported, a priority-1 off-heap LRU list is
* used, since the LNs will never be logged.
*
* An off-heap BIN is assigned an off-heap LRU index, which is stored in its
* parent UIN slot in main cache. The slot also has an "off-heap dirty" flag
* that allows the checkpointer to discover dirty off-heap BINs, and "priority
* 2" flag that indicates whether the BIN is in the priority 1 or 2 LRU lists.
*
* When a BIN moves off-heap and the BIN currently has off-heap LNs, the
* references (memory IDs) of the off-heap LNs are stored with the serialized
* BIN. When the off-heap evictor processes the BIN, it will free the
* off-heap LNs and modify or replace the off-heap BIN so that it no longer
* references them. This is the equivalent of the LN stripping performed by the
* main cache evictor.
*
* An off-heap BIN will be mutated to a BIN-delta using the same rules used by
* the main evictor.
*
* The use of separate priority 1 and priority 1 LRU lists also copies the
* approach used in the main evictor (when no off-heap cache is configured).
*
* - Eviction of nodes on the priority 2 lists occurs only after emptying the
* priority 1 lists.
*
* - A BIN is moved from a priority 1 list to a priority 2 list when it is
* dirty, its LNs have been stripped and BIN-delta mutation has been
* attempted.
*
* - Therefore, only dirty BINs with no resident LNs, and which have been
* mutated to BIN-deltas (if possible), appear in the priority 2 lists.
*
* However, in the off-heap cache, all off-heap BINs appear in an LRU list,
* unlike the main cache where some INs do not appear because they have
* resident children and therefore are not evictable.
*
* Nodes in both caches at once
* ----------------------------
* There are advantages and disadvantages to allowing duplication of a node
* (LN or BIN) in the off-heap and main cache. The advantage is when a node is
* loaded from off-heap into main, and we know (because CacheMode.EVICT_LN,
* EVICT_BIN or UNCHANGED is used) that when the operation is complete the node
* will be evicted from main and stored off-heap again. In this case it is more
* efficient to leave it off-heap and tolerate the duplication for the duration
* of the operation. However, the drawbacks of doing this are:
*
* 1. We cannot assume in code that a node is in only one cache at a time. When
* it appears in both caches, we must always use the object in the main
* cache, since the off-heap object may be stale.
*
* 2. If for some reason the node is NOT evicted from the main cache, we must
* remove it from off-heap. This can happen when the node is accessed with
* a different CacheMode (by the original thread or a different thread)
* prior to completing the operation. Removal from the off-heap cache
* should be done ASAP, so the duplication does not cause unnecessary
* eviction.
*
* 3. If the node in the main cache is modified, this invalidates the node in
* the off-heap cache and we must be sure not to use the off-heap version
* and to remove it ASAP. This is very complex for BINs in particular,
* because they can be modified in so many different ways. For LNs, on the
* other hand, the types of modifications are fairly limited.
*
* Because of the complexity issue in item 3, we do not allow duplication of
* BINs. We do allow duplication of LNs, and this is handled as follows:
*
* - freeRedundantLN is called when an LN is accessed via IN.fetchLN or getLN.
* If a CacheMode is used that will not evict the LN, the LN is removed
* from off-heap.
*
* - freeLN is called (via BIN.freeOffHeapLN) during any operation that will
* modify an LN.
*
* If for some reason these mechanisms fail to prevent unwanted duplication,
* eviction will eventually remove the redundant nodes.
*
* LRU data structures and concurrency control
* -------------------------------------------
* LRU entries form linked lists. Like in the main cache, there are two sets of
* LRU lists for priority 1 and 2 BINs, and multiple lists in each set to
* reduce thread contention on the linked lists.
*
* LRU information is allocated using arrays to minimize per-entry object
* overhead. There is a single pool of allocated entries that are used for all
* LRULists. The entries are uniquely identified by an int ID.
*
* The arrays are allocated in Chunks and a Chunk is never de-allocated. This
* is for two reasons:
*
* - Chunks can be referenced without any locking (concurrency control is
* discussed below).
*
* - Using Chunks avoids having to pre-allocate all LRU entries, while still
* minimizing Object overhead (see CHUNK_SIZE).
*
* The 'chunks' array contains all allocated Chunks. In each Chunk there is an
* array for each field in an LRU entry.
*
* LRU entries are assigned sequential int IDs starting from zero. The chunk
* for a given entry ID is:
* chunks[entry / CHUNK_SIZE]
* and the array index within the chunk is:
* entry % CHUNK_SIZE
*
* The chunks array can be read (indexed to obtain a Chunk object) without any
* locking because a copy-on-write technique is used. When a new Chunk must be
* allocated, the addRemoveEntryMutex protects the assignment of the chunks
* array. This mutex also protects the free entry list (the firstFreeListEntry
* field and the next/prev indexes of the entries on the free list). This mutex
* is global per Environment, but is not frequently locked -- only when an LRU
* entry is added or removed.
*
* The fields of an LRU entry -- the array slots -- are protected as follows.
*
* - The linked list fields -- prev and next slots -- are protected by the
* LRUList mutex, for entries in an LRUList. For entries on the free list,
* these are protected by the addRemoveEntryMutex.
*
* - Other fields -- owners and memIds slots, for example -- are protected by
* the IN latch. The IN "owns" these fields for its associated LRU entry
* (in the case of a BIN) or entries (in the case of an IN).
*
* Of course the IN latch also protects the fields in the IN related to the
* LRU entry: the BIN's lruIdx field, and the arrays of child LN memId
* (for a BIN) and child IN lruIdx (for a UIN).
*
* When multiple locks are taken, the order is:
* IN latch, LRUList mutex
* -or-
* IN latch, addRemoveEntryMutex
*
* The LRUList mutex and addRemoveEntryMutex are never both locked.
*
* AN LRU entry is in a special state when it is removed from the LRU list and
* is being processed by the evictor. In this case the IN is latched, but there
* is a window after it is removed and before it is latched where anything can
* happen. Before processing, several checks are made to ensure that the entry
* still belongs to the IN, the IN has not been evicted, and the entry has not
* been put back on the LRUList. This last check requires synchronizing on the
* LRUList, so unfortunately we must synchronize twice on the LRU list: once to
* remove the entry, and again after latching the IN to ensure that it has not
* been put back on the LRUList by another thread.
*
* TODO:
* - Test allocation failures using an allocator that simulates random
* failures. Currently, allocation failures never happen in the real world,
* because Linux kills the process before memory is exhausted. But when we
* allow alternate allocators, failures may occur if a memory pool is filled.
*/
public class OffHeapCache implements EnvConfigObserver {
private static final int VLSN_SIZE = 8;
private static final int CHECKSUM_SIZE = 4;
private static final int MAX_UNUSED_BIN_BYTES = 100;
private static final int BIN_FLAG_DELTA = 0x1;
private static final int BIN_FLAG_CAN_MUTATE = 0x2;
private static final int BIN_FLAG_PROHIBIT_NEXT_DELTA = 0x4;
private static final int BIN_FLAG_LOGGED_FULL_VERSION = 0x8;
private static final boolean DEBUG_DOUBLE_FREE = false;
private static final int DEBUG_FREE_BLOCKS_PER_MAP = 250000; // about 0.5 G
private static final boolean DEBUG_TRACE = false;
private static final boolean DEBUG_TRACE_STACK = false;
private static final boolean DEBUG_TRACE_AND_LOG = false;
/*
* Number of LRU entries to allocate at a time, i.e., per chunk.
* The goals are:
*
* 1. Create arrays large enough to make the object overhead insignificant.
* The byte[], the smallest array, is 100KB and its object overhead (16
* bytes max) is tiny in comparison.
*
* 2. Create arrays less than than 1MB in size to prevent GC issues.
* "Humongous" objects, which are expensive to GC viewpoint are 1MB or
* larger. The long[], the largest array, is 800KB with a 100K chunk size.
*
* 3. Create chunks small enough that we don't use a big percentage of a
* smallish heap to allocate one chunk. The chunk size is a little over
* 2MB, or easily small enough to meet this requirement.
*
* 4. Create chunks large enough so that we don't frequently grow the chunk
* list, which requires holding the free list mutex. 100K entries per chunk
* is easily enough.
*
* 5. Create chunks small enough so that we don't hold the free list mutex
* for too long while adding all the entries in a new chunk to the free
* list. 100K may be too large in this respect, and it could be reduced if
* this is a noticeable issue. Even better, rather than add a new chunk's
* entries to the free list, treat those entries as a "free stack" and pop
* them off separately.
*/
private static final int CHUNK_SIZE = 100 * 1024;
private static final long CHUNK_MEMORY_SIZE =
MemoryBudget.OBJECT_OVERHEAD +
16 + // For four array references -- accuracy is unimportant.
MemoryBudget.longArraySize(CHUNK_SIZE) +
MemoryBudget.objectArraySize(CHUNK_SIZE) +
MemoryBudget.intArraySize(CHUNK_SIZE) * 2;
/*
* Amount that tests should add to a minimal main cache configuration,
* when an off-heap cache is used.
*
* TODO: For now this is not budgeted.
*/
public static final long MIN_MAIN_CACHE_OVERHEAD = 0;//CHUNK_MEMORY_SIZE;
private static class Chunk {
/*
* If the IN is a UIN, the memId is the block containing the BIN.
*
* If the IN is a BIN, the memId is currently unused. It may be used in
* the future for the off-heap full BIN for a BIN-delta in main.
*/
final long[] memIds;
/*
* The IN that owns this entry.
* . Is null if the entry is not used, i.e., on the free list.
* . Is a UIN if the entry is for an off-heap BIN.
* . Is a BIN if the entry is for a BIN in the main cache.
*/
final IN[] owners;
/*
* IDs of the prev/next entries in the LRU linked list. For entries on
* the free list, only the next entry is used (it is singly-linked).
*
* The prev and next entry ID are -1 to mean the end of the list.
* . If prev == -1, then entry ID == LRUList.back.
* . If next == -1, then entry ID == LRUList.front.
*
* If next == -2, the entry is not in an LRUList nor is it on the free
* list. When next == -2 and the owner is non-null, this means the
* entry has been removed from the LRU list to be processed by the
* evictor; the evictor may decide to add it back to an LRU list or
* place is on the free list.
*
* When an entry is on the free list, next is the next ID on the free
* list, and the owner is null.
*/
final int[] prev;
final int[] next;
Chunk() {
memIds = new long[CHUNK_SIZE];
owners = new IN[CHUNK_SIZE];
prev = new int[CHUNK_SIZE];
next = new int[CHUNK_SIZE];
}
}
private class LRUList {
/*
* The front field is the entry ID of the cold end, and back is the ID
* of the hot end. Both fields are -1 if the list is empty. If there is
* only one entry, both fields have the same value.
*/
private int front = -1;
private int back = -1;
private int size = 0;
void addBack(final int entry, final IN owner, final long memId) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
/*
* Must set owner before adding to LRU list, since an entry that is
* on the LRU list with a null owner would be considered as a free
* entry (by other threads).
*/
chunk.owners[chunkIdx] = owner;
chunk.memIds[chunkIdx] = memId;
synchronized (this) {
addBackInternal(entry, chunk, chunkIdx);
}
}
void addFront(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
synchronized (this) {
addFrontInternal(entry, chunk, chunkIdx);
}
}
void moveBack(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
synchronized (this) {
if (back == entry ) {
return;
}
removeInternal(entry, chunk, chunkIdx);
addBackInternal(entry, chunk, chunkIdx);
}
}
void moveFront(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
synchronized (this) {
if (front == entry ) {
return;
}
removeInternal(entry, chunk, chunkIdx);
addFrontInternal(entry, chunk, chunkIdx);
}
}
int removeFront() {
synchronized (this) {
int entry = front;
if (entry < 0) {
return -1;
}
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
removeInternal(entry, chunk, chunkIdx);
return entry;
}
}
void remove(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
synchronized (this) {
removeInternal(entry, chunk, chunkIdx);
}
}
private void addBackInternal(final int entry,
final Chunk chunk,
final int chunkIdx ) {
assert chunk.owners[chunkIdx] != null;
assert chunk.next[chunkIdx] == -2;
if (back < 0) {
assert back == -1;
assert front == -1;
chunk.prev[chunkIdx] = -1;
chunk.next[chunkIdx] = -1;
back = entry;
front = entry;
} else {
assert front >= 0;
final Chunk nextChunk = chunks[back / CHUNK_SIZE];
final int nextIdx = back % CHUNK_SIZE;
assert nextChunk.prev[nextIdx] < 0;
nextChunk.prev[nextIdx] = entry;
chunk.next[chunkIdx] = back;
chunk.prev[chunkIdx] = -1;
back = entry;
}
size += 1;
}
private void addFrontInternal(final int entry,
final Chunk chunk,
final int chunkIdx ) {
assert chunk.owners[chunkIdx] != null;
assert chunk.next[chunkIdx] == -2;
if (front < 0) {
assert back == -1;
assert front == -1;
chunk.prev[chunkIdx] = -1;
chunk.next[chunkIdx] = -1;
front = entry;
back = entry;
} else {
assert back >= 0;
final Chunk prevChunk = chunks[front / CHUNK_SIZE];
final int prevIdx = front % CHUNK_SIZE;
assert prevChunk.next[prevIdx] < 0;
prevChunk.next[prevIdx] = entry;
chunk.prev[chunkIdx] = front;
chunk.next[chunkIdx] = -1;
front = entry;
}
size += 1;
}
private void removeInternal(final int entry,
final Chunk chunk,
final int chunkIdx ) {
assert chunk.owners[chunkIdx] != null;
if (chunk.next[chunkIdx] == -2) {
return;
}
assert front >= 0;
assert back >= 0;
final int prev = chunk.prev[chunkIdx];
final int next = chunk.next[chunkIdx];
if (prev < 0) {
assert prev == -1;
assert back == entry;
back = next;
} else {
assert back != entry;
final Chunk prevChunk = chunks[prev / CHUNK_SIZE];
final int prevIdx = prev % CHUNK_SIZE;
assert prevChunk.next[prevIdx] == entry;
prevChunk.next[prevIdx] = next;
}
if (next < 0) {
assert next == -1;
assert front == entry;
front = prev;
} else {
assert front != entry;
final Chunk nextChunk = chunks[next / CHUNK_SIZE];
final int nextIdx = next % CHUNK_SIZE;
assert nextChunk.prev[nextIdx] == entry;
nextChunk.prev[nextIdx] = prev;
}
chunk.next[chunkIdx] = -2;
size -= 1;
}
boolean contains(final Chunk chunk, final int chunkIdx) {
synchronized (this) {
assert chunk.next[chunkIdx] >= -2;
return chunk.next[chunkIdx] != -2 &&
chunk.owners[chunkIdx] != null;
}
}
int getSize() {
return size;
}
}
private final Logger logger;
private final OffHeapAllocator allocator;
private boolean runEvictorThreads;
private int maxPoolThreads;
private final AtomicInteger activePoolThreads = new AtomicInteger(0);
private final AtomicBoolean shutdownRequested = new AtomicBoolean(false);
private final ThreadPoolExecutor evictionPool;
private int terminateMillis;
private long maxMemory;
private long memoryLimit;
private final long evictBytes;
private volatile Map<Long, Exception> freedBlocks;
private volatile Map<Long, Exception> prevFreedBlocks;
private volatile Chunk[] chunks;
private int firstFreeListEntry = -1;
private final Object addRemoveEntryMutex = new Object();
private final int numLRULists;
private final LRUList[] pri1LRUSet;
private final LRUList[] pri2LRUSet;
private int nextPri1LRUList = 0;
private int nextPri2LRUList = 0;
private final AtomicLong nAllocFailure = new AtomicLong(0);
private final AtomicLong nAllocOverflow = new AtomicLong(0);
private final AtomicLong nThreadUnavailable = new AtomicLong(0);
private final AtomicLong nCriticalNodesTargeted = new AtomicLong(0);
private final AtomicLong nNodesTargeted = new AtomicLong(0);
private final AtomicLong nNodesEvicted = new AtomicLong(0);
private final AtomicLong nDirtyNodesEvicted = new AtomicLong(0);
private final AtomicLong nNodesStripped = new AtomicLong(0);
private final AtomicLong nNodesMutated = new AtomicLong(0);
private final AtomicLong nNodesSkipped = new AtomicLong(0);
private final AtomicLong nLNsEvicted = new AtomicLong(0);
private final AtomicLong nLNsLoaded = new AtomicLong(0);
private final AtomicLong nLNsStored = new AtomicLong(0);
private final AtomicLong nBINsLoaded = new AtomicLong(0);
private final AtomicLong nBINsStored = new AtomicLong(0);
private final AtomicInteger cachedLNs = new AtomicInteger(0);
private final AtomicInteger cachedBINs = new AtomicInteger(0);
private final AtomicInteger cachedBINDeltas = new AtomicInteger(0);
private final AtomicInteger totalBlocks = new AtomicInteger(0);
private final AtomicInteger lruSize = new AtomicInteger(0);
public OffHeapCache(final EnvironmentImpl envImpl) {
logger = LoggerUtils.getLogger(getClass());
final DbConfigManager configManager = envImpl.getConfigManager();
maxMemory = configManager.getLong(
EnvironmentParams.MAX_OFF_HEAP_MEMORY);
if (maxMemory == 0) {
allocator = DummyAllocator.INSTANCE;
} else {
try {
final OffHeapAllocatorFactory factory =
new OffHeapAllocatorFactory();
allocator = factory.getDefaultAllocator();
} catch (Throwable e) {
// TODO: allow continuing without an off-heap cache?
throw new IllegalStateException(
"Unable to create default allocator for off-heap cache", e);
}
}
evictBytes = configManager.getLong(
EnvironmentParams.OFFHEAP_EVICT_BYTES);
numLRULists = configManager.getInt(
EnvironmentParams.OFFHEAP_N_LRU_LISTS);
allocator.setMaxBytes(maxMemory);
memoryLimit = maxMemory;
pri1LRUSet = new LRUList[numLRULists];
pri2LRUSet = new LRUList[numLRULists];
for (int i = 0; i < numLRULists; i += 1) {
pri1LRUSet[i] = new LRUList();
pri2LRUSet[i] = new LRUList();
}
if (DEBUG_DOUBLE_FREE) {
freedBlocks = new ConcurrentHashMap<>();
prevFreedBlocks = new ConcurrentHashMap<>();
} else {
freedBlocks = null;
prevFreedBlocks = null;
}
terminateMillis = configManager.getDuration(
EnvironmentParams.EVICTOR_TERMINATE_TIMEOUT);
final int corePoolSize = configManager.getInt(
EnvironmentParams.OFFHEAP_CORE_THREADS);
maxPoolThreads = configManager.getInt(
EnvironmentParams.OFFHEAP_MAX_THREADS);
final long keepAliveTime = configManager.getDuration(
EnvironmentParams.OFFHEAP_KEEP_ALIVE);
final boolean isShared = envImpl.getSharedCache();
evictionPool = new ThreadPoolExecutor(
corePoolSize, maxPoolThreads,
keepAliveTime, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<Runnable>(1),
new StoppableThreadFactory(
isShared ? null : envImpl, "JEOffHeapEvictor", logger),
new RejectedExecutionHandler() {
@Override
public void rejectedExecution(
Runnable r, ThreadPoolExecutor executor) {
nThreadUnavailable.incrementAndGet();
}
});
runEvictorThreads = configManager.getBoolean(
EnvironmentParams.ENV_RUN_OFFHEAP_EVICTOR);
envImpl.addConfigObserver(this);
}
@Override
public void envConfigUpdate(
final DbConfigManager configManager,
final EnvironmentMutableConfig ignore) {
terminateMillis = configManager.getDuration(
EnvironmentParams.EVICTOR_TERMINATE_TIMEOUT);
final int corePoolSize = configManager.getInt(
EnvironmentParams.OFFHEAP_CORE_THREADS);
maxPoolThreads = configManager.getInt(
EnvironmentParams.OFFHEAP_MAX_THREADS);
final long keepAliveTime = configManager.getDuration(
EnvironmentParams.OFFHEAP_KEEP_ALIVE);
evictionPool.setCorePoolSize(corePoolSize);
evictionPool.setMaximumPoolSize(maxPoolThreads);
evictionPool.setKeepAliveTime(keepAliveTime, TimeUnit.MILLISECONDS);
runEvictorThreads = configManager.getBoolean(
EnvironmentParams.ENV_RUN_OFFHEAP_EVICTOR);
final long newMaxMemory = configManager.getLong(
EnvironmentParams.MAX_OFF_HEAP_MEMORY);
if ((newMaxMemory > 0) != (maxMemory > 0)) {
// TODO detect this error earlier?
throw new IllegalArgumentException(
"Cannot change off-heap cache size between zero and non-zero");
}
maxMemory = newMaxMemory;
allocator.setMaxBytes(newMaxMemory);
memoryLimit = newMaxMemory;
}
public void requestShutdown() {
shutdownRequested.set(true);
evictionPool.shutdown();
}
public void shutdown() {
shutdownRequested.set(true);
evictionPool.shutdown();
boolean shutdownFinished = false;
try {
shutdownFinished = evictionPool.awaitTermination(
terminateMillis, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
/* We've been interrupted, just give up and end. */
} finally {
if (!shutdownFinished) {
evictionPool.shutdownNow();
}
clearCache(null);
// envImpl.getMemoryBudget().updateAdminMemoryUsage(
// 0 - (chunks.length * CHUNK_MEMORY_SIZE));
chunks = null;
}
}
public boolean isEnabled() {
return allocator != DummyAllocator.INSTANCE;
}
public long clearCache(final EnvironmentImpl matchEnv) {
/*
* Use local var because when matchEnv is non-null, other threads (for
* other envs in the shared pool) are running and may replace the
* array. However, all entries for matchEnv will remain in the local
* array.
*/
final Chunk[] myChunks = chunks;
if (myChunks == null) {
return 0;
}
long size = 0;
for (Chunk chunk : myChunks) {
for (int chunkIdx = 0; chunkIdx < CHUNK_SIZE; chunkIdx += 1) {
final IN owner = chunk.owners[chunkIdx];
if (owner == null) {
continue;
}
if (matchEnv != null && owner.getEnv() != matchEnv) {
continue;
}
owner.latchNoUpdateLRU();
try {
size += removeINFromMain(owner);
} finally {
owner.releaseLatch();
}
}
}
return size;
}
public StatGroup loadStats(StatsConfig config) {
StatGroup stats = new StatGroup(GROUP_NAME, GROUP_DESC);
new LongStat(stats, ALLOC_FAILURE, nAllocFailure.get());
new LongStat(stats, ALLOC_OVERFLOW, nAllocOverflow.get());
new LongStat(stats, THREAD_UNAVAILABLE, nThreadUnavailable.get());
new LongStat(stats, CRITICAL_NODES_TARGETED, nCriticalNodesTargeted.get());
new LongStat(stats, NODES_TARGETED, nNodesTargeted.get());
new LongStat(stats, NODES_EVICTED, nNodesEvicted.get());
new LongStat(stats, DIRTY_NODES_EVICTED, nDirtyNodesEvicted.get());
new LongStat(stats, NODES_STRIPPED, nNodesStripped.get());
new LongStat(stats, NODES_MUTATED, nNodesMutated.get());
new LongStat(stats, NODES_SKIPPED, nNodesSkipped.get());
new LongStat(stats, LNS_EVICTED, nLNsEvicted.get());
new LongStat(stats, LNS_LOADED, nLNsLoaded.get());
new LongStat(stats, LNS_STORED, nLNsStored.get());
new LongStat(stats, BINS_LOADED, nBINsLoaded.get());
new LongStat(stats, BINS_STORED, nBINsStored.get());
new IntStat(stats, CACHED_LNS, cachedLNs.get());
new IntStat(stats, CACHED_BINS, cachedBINs.get());
new IntStat(stats, CACHED_BIN_DELTAS, cachedBINDeltas.get());
new LongStat(stats, TOTAL_BYTES, allocator.getUsedBytes());
new IntStat(stats, TOTAL_BLOCKS, totalBlocks.get());
new IntStat(stats, LRU_SIZE, lruSize.get());
if (config.getClear()) {
nAllocFailure.set(0);
nAllocOverflow.set(0);
nThreadUnavailable.set(0);
nCriticalNodesTargeted.set(0);
nNodesTargeted.set(0);
nNodesEvicted.set(0);
nDirtyNodesEvicted.set(0);
nNodesStripped.set(0);
nNodesMutated.set(0);
nNodesSkipped.set(0);
nLNsEvicted.set(0);
nLNsLoaded.set(0);
nLNsStored.set(0);
nBINsLoaded.set(0);
nBINsStored.set(0);
}
return stats;
}
public long getMaxMemory() {
return maxMemory;
}
public long getUsedMemory() {
return allocator.getUsedBytes();
}
/**
* Forces allocation of the first chunk of entries. Used by tests that need
* to more precisely control cache behavior.
*/
public void preallocateLRUEntries() {
if (chunks == null) {
freeEntry(allocateEntry());
}
}
public OffHeapAllocator getAllocator() {
return allocator;
}
private void debug(final EnvironmentImpl envImpl, String msg) {
assert DEBUG_TRACE;
if (DEBUG_TRACE_STACK) {
msg += " " + LoggerUtils.getStackTrace();
}
if (DEBUG_TRACE_AND_LOG) {
LoggerUtils.traceAndLog(logger, envImpl, Level.INFO, msg);
} else {
LoggerUtils.logMsg(logger, envImpl, Level.INFO, msg);
}
}
private int addBack(final boolean pri2, IN owner, long memId) {
assert owner.isLatchExclusiveOwner();
final int entry = allocateEntry();
final int lruIdx = entry % numLRULists;
final LRUList lru =
pri2 ? pri2LRUSet[lruIdx] : pri1LRUSet[lruIdx];
lru.addBack(entry, owner, memId);
return entry;
}
public int moveBack(final int entry, final boolean pri2) {
final int lruIdx = entry % numLRULists;
final LRUList lru =
pri2 ? pri2LRUSet[lruIdx] : pri1LRUSet[lruIdx];
lru.moveBack(entry);
return entry;
}
private int moveFront(final int entry, final boolean pri2) {
final int lruIdx = entry % numLRULists;
final LRUList lru =
pri2 ? pri2LRUSet[lruIdx] : pri1LRUSet[lruIdx];
lru.moveFront(entry);
return entry;
}
private void remove(final int entry, final boolean pri2) {
final int lruIdx = entry % numLRULists;
final LRUList lru =
pri2 ? pri2LRUSet[lruIdx] : pri1LRUSet[lruIdx];
lru.remove(entry);
freeEntry(entry);
}
/**
* Takes an entry from the free list. If the free list is empty, allocates
* a new chunk and adds its entries to the free list.
*/
private int allocateEntry() {
synchronized (addRemoveEntryMutex) {
if (firstFreeListEntry >= 0) {
final int entry = firstFreeListEntry;
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
firstFreeListEntry = chunk.next[chunkIdx];
chunk.next[chunkIdx] = -2;
lruSize.incrementAndGet();
return entry;
}
final Chunk newChunk = new Chunk();
final int[] next = newChunk.next;
final int nOldChunks = (chunks != null) ? chunks.length : 0;
/* Entry 0 in the new chunk will be returned. */
int nextFree = nOldChunks * CHUNK_SIZE;
final int entry = nextFree++;
next[0] = -2;
/* Entry 1 is the tail of the free list. */
next[1] = -1;
/*
* Entry 2 and above are added to the free list.
*
* This loop needs to be as fast as possible, which is why we're
* using local vars for next and nextFree.
*
* In the loop, nextFree starts out as entry 1 (tail of free
* list) and ends up as the last free entry (head of free list).
*/
for (int i = 2; i < CHUNK_SIZE; i += 1) {
next[i] = nextFree++;
}
/* The last entry is the head of the free list. */
firstFreeListEntry = nextFree;
final Chunk[] newChunks = new Chunk[nOldChunks + 1];
if (nOldChunks > 0) {
System.arraycopy(chunks, 0, newChunks, 0, nOldChunks);
}
newChunks[nOldChunks] = newChunk;
/* Assign to volatile chunks field as the very last step. */
chunks = newChunks;
lruSize.incrementAndGet();
// envImpl.getMemoryBudget().updateAdminMemoryUsage(
// CHUNK_MEMORY_SIZE);
return entry;
}
}
/**
* Removes the entry from its LRU and adds it to the free list.
*/
private void freeEntry(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
synchronized (addRemoveEntryMutex) {
if (chunk.owners[chunkIdx] == null) {
return; // Already on free list
}
chunk.owners[chunkIdx] = null;
chunk.next[chunkIdx] = firstFreeListEntry;
firstFreeListEntry = entry;
lruSize.decrementAndGet();
}
}
public long getMemId(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
return chunk.memIds[chunkIdx];
}
private IN getOwner(final int entry) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
return chunk.owners[chunkIdx];
}
public void setOwner(final int entry, final IN owner) {
assert owner.isLatchExclusiveOwner();
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
assert chunk.owners[chunkIdx] != null;
assert chunk.owners[chunkIdx].isLatchExclusiveOwner();
chunk.owners[chunkIdx] = owner;
}
private void setOwnerAndMemId(final int entry,
final IN owner,
final long memId) {
assert owner.isLatchExclusiveOwner();
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
assert chunk.owners[chunkIdx] != null;
assert chunk.owners[chunkIdx].isLatchExclusiveOwner();
chunk.owners[chunkIdx] = owner;
chunk.memIds[chunkIdx] = memId;
}
/**
* Called before eviction of an LN from main cache to provide an
* opportunity to store the LN off-heap.
*/
public boolean storeEvictedLN(final BIN bin,
final int index,
final LN ln) {
assert !ln.isDirty();
assert bin.isLatchExclusiveOwner();
assert bin.getInListResident();
final DatabaseImpl dbImpl = bin.getDatabase();
long memId = bin.getOffHeapLNId(index);
if (memId != 0) {
assert bin.getOffHeapLruId() >= 0;
/*
* If already stored off-heap, make the entry hot when
* CacheMode.UNCHANGED does not apply (getFetchedCold is false).
*/
if (!bin.getFetchedCold()) {
moveBack(bin.getOffHeapLruId(), false);
}
if (DEBUG_TRACE) {
debug(
bin.getEnv(),
"Evicted LN already store LSN=" +
DbLsn.getNoFormatString(bin.getLsn(index)));
}
return true;
}
/*
* Do not store off-heap:
* - When CacheMode.UNCHANGED applies (getFetchedCold is true). This
* is when the node was originally fetched from disk into main.
* - Deleted LNs are no longer needed.
* - For embedded LNs and dup DBs, there is no separate LN.
* - Off-heap caching for internal DBs is not currently supported.
*/
if (ln.getFetchedCold() ||
ln.isDeleted() ||
bin.isEmbeddedLN(index) ||
dbImpl.getSortedDuplicates() ||
dbImpl.isDeferredWriteMode() || // TODO remove
dbImpl.getDbType().isInternal()) {
return false;
}
memId = serializeLN(dbImpl.getEnv(), ln);
if (memId == 0) {
return false;
}
bin.setOffHeapLNId(index, memId);
/* Add to LRU at hot end, or make hot if already in LRU. */
int entry = bin.getOffHeapLruId();
if (entry < 0) {
entry = addBack(false, bin, 0);
bin.setOffHeapLruId(entry);
} else {
moveBack(entry, false);
}
if (DEBUG_TRACE) {
debug(
bin.getEnv(),
"Stored evicted LN LSN=" +
DbLsn.getNoFormatString(bin.getLsn(index)));
}
return true;
}
/**
* Called when an LN has been fetched from disk and should be stored
* off-heap.
*/
public boolean storePreloadedLN(final BIN bin,
final int index,
final LN ln) {
final DatabaseImpl dbImpl = bin.getDatabase();
assert !ln.isDirty();
assert !ln.isDeleted();
assert bin.isLatchExclusiveOwner();
assert !bin.isEmbeddedLN(index);
assert bin.getTarget(index) == null;
assert !dbImpl.getSortedDuplicates();
assert !dbImpl.isDeferredWriteMode(); // TODO remove
assert !dbImpl.getDbType().isInternal();
if (bin.getOffHeapLNId(index) != 0) {
assert bin.getInListResident();
return true;
}
final long memId = serializeLN(dbImpl.getEnv(), ln);
if (memId == 0) {
return false;
}
bin.setOffHeapLNId(index, memId);
if (!bin.getInListResident()) {
/* Preloading into a temporary BIN, not in the Btree. */
return true;
}
/* Add to LRU at hot end, or make hot if already in LRU. */
int entry = bin.getOffHeapLruId();
if (entry < 0) {
entry = addBack(false, bin, 0);
bin.setOffHeapLruId(entry);
} else {
moveBack(entry, false);
}
return true;
}
public boolean ensureOffHeapLNsInLRU(final BIN bin) {
assert bin.isLatchExclusiveOwner();
if (bin.getOffHeapLruId() >= 0) {
return true;
}
if (!bin.hasOffHeapLNs()) {
return false;
}
final int entry = addBack(false, bin, 0);
bin.setOffHeapLruId(entry);
return true;
}
public LN loadLN(final BIN bin,
final int index,
final CacheMode cacheMode) {
assert bin.isLatchExclusiveOwner();
final long memId = bin.getOffHeapLNId(index);
if (memId == 0) {
return null;
}
final LN ln = materializeLN(bin.getEnv(), memId);
switch (cacheMode) {
case UNCHANGED:
case MAKE_COLD:
/* Will be evicted from main. Leave off-heap. */
break;
case EVICT_LN:
case EVICT_BIN:
/* Will be evicted from main. Leave off-heap and make hot. */
assert bin.getOffHeapLruId() >= 0;
moveBack(bin.getOffHeapLruId(), false);
break;
case DEFAULT:
case KEEP_HOT:
/* Will remain in main. Remove from off-heap. */
bin.setOffHeapLNId(index, 0);
freeLN(memId);
break;
default:
assert false;
}
if (DEBUG_TRACE) {
debug(
bin.getEnv(),
"Loaded LN LSN=" +
DbLsn.getNoFormatString(bin.getLsn(index)));
}
return ln;
}
public void freeRedundantLN(final BIN bin,
final int index,
final LN ln,
final CacheMode cacheMode) {
assert bin.isLatchExclusiveOwner();
final long memId = bin.getOffHeapLNId(index);
if (memId == 0) {
return;
}
switch (cacheMode) {
case UNCHANGED:
case MAKE_COLD:
if (ln.getFetchedCold()) {
/* Will be evicted from main. Leave off-heap. */
return;
}
/* Will remain in main. Remove from off-heap. */
break;
case EVICT_BIN:
case EVICT_LN:
/* Will be evicted from main. Leave off-heap. */
return;
case DEFAULT:
case KEEP_HOT:
/* Will remain in main. Remove from off-heap. */
break;
default:
assert false;
}
bin.setOffHeapLNId(index, 0);
freeLN(memId);
}
public long loadVLSN(final BIN bin, final int index) {
if (!bin.getEnv().getCacheVLSN()) {
return VLSN.NULL_VLSN_SEQUENCE;
}
final long memId = bin.getOffHeapLNId(index);
if (memId == 0) {
return VLSN.NULL_VLSN_SEQUENCE;
}
return getLong(memId, 0, new byte[8]);
}
public int freeLN(final BIN bin, final int index) {
assert bin.isLatchExclusiveOwner();
final long memId = bin.getOffHeapLNId(index);
if (memId == 0) {
return 0;
}
/*
* Since the LN was off-heap, set fetched-cold to false. Otherwise
* the fetched-cold flag will prevent the LN from being stored
* off-heap when it is evicted later.
*/
final LN ln = (LN) bin.getTarget(index);
if (ln != null) {
ln.setFetchedCold(false);
}
bin.setOffHeapLNId(index, 0);
return freeLN(memId);
}
private int freeLN(final long memId) {
cachedLNs.decrementAndGet();
return freeMemory(memId);
}
private long serializeLN(final EnvironmentImpl envImpl, final LN ln) {
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
final int vlsnSize = envImpl.getCacheVLSN() ? VLSN_SIZE : 0;
final int lnDataOffset = vlsnSize + checksumSize;
/*
* We make 3 calls to allocator.copy (one explicit and two via putLong
* and putInt) rather than just one because:
* - This avoids an extra copy and buffer allocation for the LN data.
* - The LN data is potentially large.
* - The checksum is normally off in production, so there is at most
* one extra allocator.copy for the VLSN.
*/
final byte[] data = ln.getData();
assert data != null;
final long memId = allocateMemory(envImpl, lnDataOffset + data.length);
if (memId == 0) {
return 0;
}
final byte[] tempBuf =
(vlsnSize > 0 || useChecksums) ? new byte[8] : null;
if (vlsnSize > 0) {
putLong(ln.getVLSNSequence(), memId, 0, tempBuf);
}
if (useChecksums) {
final Checksum checksum = Adler32.makeChecksum();
checksum.update(data, 0, data.length);
final int checksumValue = (int) checksum.getValue();
putInt(checksumValue, memId, vlsnSize, tempBuf);
}
allocator.copy(data, 0, memId, lnDataOffset, data.length);
nLNsStored.incrementAndGet();
cachedLNs.incrementAndGet();
return memId;
}
private LN materializeLN(final EnvironmentImpl envImpl,
final long memId) {
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
final int vlsnSize = envImpl.getCacheVLSN() ? VLSN_SIZE : 0;
final int lnDataOffset = vlsnSize + checksumSize;
final byte[] data = new byte[allocator.size(memId) - lnDataOffset];
allocator.copy(memId, lnDataOffset, data, 0, data.length);
final byte[] tempBuf =
(vlsnSize > 0 || useChecksums) ? new byte[8] : null;
if (useChecksums) {
final int storedChecksum = getInt(memId, vlsnSize, tempBuf);
if (storedChecksum != 0) {
final Checksum checksum = Adler32.makeChecksum();
checksum.update(data, 0, data.length);
final int checksumValue = (int) checksum.getValue();
if (storedChecksum != checksumValue) {
throw unexpectedState(
envImpl,
"Off-heap cache checksum error. Expected " +
storedChecksum + " but got " + checksumValue);
}
}
}
nLNsLoaded.incrementAndGet();
final LN ln = LN.makeLN(envImpl, data);
ln.clearDirty(); // New LNs are initially dirty.
if (vlsnSize > 0) {
ln.setVLSNSequence(getLong(memId, 0, tempBuf));
}
return ln;
}
/**
* Called before eviction of a BIN from main cache to provide an
* opportunity to store the BIN off-heap.
*
* removeINFromMain is called after this method by the main evictor. It
* is removeINFromMain that removes the main BIN's off-heap LRU entry, if
* it has one. The bin and parent latches are held across the calls to
* storeEvictedBIN and removeINFromMain.
*
* removeINFromMain will also free any off-heap LN IDs in the main BIN,
* and therefore this method must clear those IDs in the main BIN. When
* the BIN is stored off-heap by this method, the LN IDs will be stored
* along with the off-heap BIN.
*/
public boolean storeEvictedBIN(final BIN bin,
final IN parent,
final int index) {
assert bin.isLatchExclusiveOwner();
assert bin.getInListResident();
assert parent.isLatchExclusiveOwner();
assert parent.getInListResident();
assert bin == parent.getTarget(index);
assert parent.getOffHeapBINId(index) < 0;
final DatabaseImpl dbImpl = bin.getDatabase();
/*
* Do not store off-heap:
* - When CacheMode.UNCHANGED applies, the BIN was not loaded from
* off-heap, and the BIN is not dirty.
* - Off-heap caching for internal DBs is not currently supported.
*/
if ((bin.getFetchedCold() &&
!bin.getFetchedColdOffHeap() &&
!bin.getDirty()) ||
dbImpl.isDeferredWriteMode() || // TODO remove
dbImpl.getDbType().isInternal()) {
return false;
}
/* Serialize the BIN and add it to the off-heap LRU. */
final long memId = serializeBIN(bin, bin.isBINDelta());
if (memId == 0) {
return false;
}
/*
* Reuse LRU entry if one exists for the BIN, in order not to change
* the effective LRU position of its off-heap LNs. When off-heap LNs
* are present, we want to preserve the off-heap LRU position to allow
* the LNs to be stripped sooner.
*/
int entry = bin.getOffHeapLruId();
if (entry >= 0) {
setOwnerAndMemId(entry, parent, memId);
bin.clearOffHeapLNIds();
bin.setOffHeapLruId(-1);
} else {
entry = addBack(false /*pri2*/, parent, memId);
}
parent.setOffHeapBINId(index, entry, false /*pri2*/, bin.getDirty());
if (DEBUG_TRACE) {
debug(
bin.getEnv(),
"Stored BIN LSN=" +
DbLsn.getNoFormatString(parent.getLsn(index)) +
" Node=" + bin.getNodeId() +
" dirty=" + bin.getDirty());
}
return true;
}
/**
* Called when a BIN has been fetched from disk and should be stored
* off-heap.
*/
public boolean storePreloadedBIN(final BIN bin,
final IN parent,
final int index) {
assert bin != null;
assert parent.isLatchExclusiveOwner();
assert parent.getInListResident();
assert parent.getTarget(index) == null;
final DatabaseImpl dbImpl = bin.getDatabase();
assert !dbImpl.isDeferredWriteMode(); // TODO remove
assert !dbImpl.getDbType().isInternal();
/* Pass non-null 'bin' so that off-heap LNs are not freed. */
freeBIN(bin, parent, index);
final long memId = serializeBIN(bin, bin.isBINDelta());
if (memId == 0) {
return false;
}
final int entry = addBack(false /*pri2*/, parent, memId);
parent.setOffHeapBINId(index, entry, false /*pri2*/, bin.getDirty());
return true;
}
/**
* Called before eviction of a level 2 IN from main cache. Any off-heap
* BIN children are first logged, if dirty, and then discarded.
*
* @return true if all BINs could be discarded, or false if a dirty BIN
* could not be logged due to a read-only env or disk limit violation.
*/
boolean flushAndDiscardBINChildren(final IN in,
final boolean backgroundIO) {
assert in.isLatchExclusiveOwner();
assert in.getInListResident();
assert in.getNormalizedLevel() == 2;
if (!in.hasOffHeapBINIds()) {
return true;
}
boolean allDiscarded = true;
for (int i = 0; i < in.getNEntries(); i += 1) {
final int entry = in.getOffHeapBINId(i);
if (entry < 0) {
continue;
}
if (flushAndDiscardBIN(
entry, in.isOffHeapBINPri2(i), in.isOffHeapBINDirty(i),
getMemId(entry), in, i, backgroundIO, true /*freeLNs*/) == 0) {
allDiscarded = false;
}
}
return allDiscarded;
}
/**
* Called:
* - after eviction of an IN from main cache, and in that case
* storeEvictedBIN was called and the eviction was completed.
* - when an IN is removed from the main cache for another reason,
* such as a reverse split or Database removal.
* - for all INs in an Environment being removed from the shared cache.
*/
public long removeINFromMain(final IN in) {
assert in.isLatchExclusiveOwner();
final int level = in.getNormalizedLevel();
if (level > 2) {
return 0;
}
if (level == 2) {
if (!in.hasOffHeapBINIds()) {
return 0;
}
long size = 0;
for (int i = 0; i < in.getNEntries(); i += 1) {
final BIN bin = (BIN) in.getTarget(i);
if (bin != null) {
bin.latchNoUpdateLRU();
}
try {
size += freeBIN(bin, in, i);
} finally {
if (bin != null) {
bin.releaseLatch();
}
}
}
return size;
}
assert level == 1 && in.isBIN();
final BIN bin = (BIN) in;
final int entry = bin.getOffHeapLruId();
if (entry < 0) {
assert !bin.hasOffHeapLNs();
return 0;
}
long size = 0;
if (bin.hasOffHeapLNs()) {
for (int i = 0; i < bin.getNEntries(); i += 1) {
size += freeLN(bin, i);
}
}
bin.setOffHeapLruId(-1);
remove(entry, false);
return size;
}
public BIN loadBIN(final EnvironmentImpl envImpl, final int entry) {
assert entry >= 0;
return materializeBIN(envImpl, getMemBytes(getMemId(entry)));
}
/**
* Loads a BIN for the given entry, if its last logged LSN is the given
* LSN. Can be used to store an entry for a BIN (the off-heap BIN ID)
* without holding its parent IN latch, and later find out whether that
* entry still refers to the same BIN. If the BIN was split, the LSN will
* have changed and null is returned. If the BIN is no longer off-heap, or
* was moved off-heap and back on, null is also returned.
*
* If the BIN is redundantly resident in the main and off-heap caches, the
* main cache "live" version is returned. Otherwise the BIN is deserialized
* from the off-heap version and is not "live". When non-null is returned,
* the returned BIN is latched.
*/
public BIN loadBINIfLsnMatches(final EnvironmentImpl envImpl,
final int entry,
final long lsn) {
final Pair<IN, Integer> result =
findBINIfLsnMatches(envImpl, entry, lsn);
if (result == null) {
return null;
}
final IN in = result.first();
final int index = result.second();
try {
BIN bin = (BIN) in.getTarget(index);
if (bin != null) {
bin.latchNoUpdateLRU();
return bin;
}
final long memId = getMemId(entry);
bin = materializeBIN(envImpl, getMemBytes(memId));
bin.latchNoUpdateLRU(in.getDatabase());
return bin;
} finally {
in.releaseLatch();
}
}
public void evictBINIfLsnMatch(final EnvironmentImpl envImpl,
final int entry,
final long lsn) {
final Pair<IN, Integer> result =
findBINIfLsnMatches(envImpl, entry, lsn);
if (result == null) {
return;
}
final IN in = result.first();
final int index = result.second();
try {
assert in.getTarget(index) == null;
freeBIN(null, in, index);
} finally {
in.releaseLatch();
}
}
/**
* If non-null is returned, the returned IN will be EX latched.
*/
private Pair<IN, Integer> findBINIfLsnMatches(
final EnvironmentImpl envImpl,
final int entry,
final long lsn) {
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
final IN in = chunk.owners[chunkIdx];
if (in == null) {
return null;
}
/*
* The validation process here is very similar to in evictOne. See the
* comments in that method.
*/
in.latchNoUpdateLRU();
if (in != chunk.owners[chunkIdx] ||
!in.getInListResident() ||
in.getEnv() != envImpl ||
in.isBIN()) {
in.releaseLatch();
return null;
}
int index = -1;
for (int i = 0; i < in.getNEntries(); i += 1) {
if (in.getOffHeapBINId(i) == entry) {
index = i;
break;
}
}
if (index < 0) {
in.releaseLatch();
return null;
}
if (in.getLsn(index) != lsn) {
in.releaseLatch();
return null;
}
return new Pair<>(in, index);
}
public byte[] getBINBytes(final IN parent, final int index) {
assert parent.isLatchOwner();
final int entry = parent.getOffHeapBINId(index);
if (entry < 0) {
return null;
}
assert parent == getOwner(entry);
return getMemBytes(getMemId(entry));
}
/**
* Called when a BIN's bytes were obtained holding a shared latch, and then
* the latch was released and acquired again. We need to determine whether
* the BIN was changed and moved off-heap again, while unlatched.
*
* Currently we just get the bytes again and compare.
*
* Possible optimization: Maintain a generation count in the serialized
* BIN, whose value comes from a global counter that is incremented
* whenever a BIN is serialized. But would the range of such a counter be
* large enough to guarantee that wrapping won't be a problem? Certainly
* the odds are low, but how can we guarantee it won't happen? Another
* approach is to maintain the counter in the BIN in main cache, so it is a
* per BIN value.
*/
public boolean haveBINBytesChanged(final IN parent,
final int index,
final byte[] bytes) {
assert parent.isLatchOwner();
return !Arrays.equals(bytes, getBINBytes(parent, index));
}
public void postBINLoad(final IN parent, final int index, final BIN bin) {
assert bin.isLatchExclusiveOwner();
assert parent.isLatchExclusiveOwner();
assert parent.getInListResident();
assert parent.getTarget(index) == null;
final int entry = parent.getOffHeapBINId(index);
assert entry >= 0;
assert parent == getOwner(entry);
bin.setDirty(parent.isOffHeapBINDirty(index));
final long freed = freeBIN(bin, parent, index);
assert freed > 0;
ensureOffHeapLNsInLRU(bin);
if (DEBUG_TRACE) {
debug(
parent.getEnv(),
"Loaded BIN LSN=" +
DbLsn.getNoFormatString(parent.getLsn(index)) +
" Node=" + bin.getNodeId() +
" dirty=" + bin.getDirty());
}
}
public long freeBIN(final BIN bin, final IN parent, final int index) {
assert parent.isLatchExclusiveOwner();
assert bin == null || bin.isLatchExclusiveOwner();
final int entry = parent.getOffHeapBINId(index);
if (entry < 0) {
return 0;
}
assert parent == getOwner(entry);
final boolean pri2 = parent.isOffHeapBINPri2(index);
final long memId = getMemId(entry);
parent.clearOffHeapBINId(index);
remove(entry, pri2);
/*
* Only free the LNs referenced by the off-heap BIN if the BIN is not
* resident in main (bin == null). When the off-heap BIN is stale, its
* LN Ids are also stale.
*/
return freeBIN(parent.getEnv(), memId, bin == null /*freeLNs*/);
}
private long freeBIN(final EnvironmentImpl envImpl,
final long memId,
final boolean freeLNs) {
long size = 0;
final int flags;
if (freeLNs) {
final ParsedBIN pb = parseBINBytes(
envImpl, getMemBytes(memId),
false /*partialBuf*/, true /*parseLNIds*/);
if (pb.lnMemIds != null) {
for (final long lnMemId : pb.lnMemIds) {
if (lnMemId == 0) {
continue;
}
size += freeLN(lnMemId);
}
}
flags = pb.flags;
} else {
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
flags = getByte(memId, checksumSize, new byte[1]);
}
cachedBINs.decrementAndGet();
if ((flags & BIN_FLAG_DELTA) != 0) {
cachedBINDeltas.decrementAndGet();
}
return size + freeMemory(memId);
}
long serializeBIN(final BIN bin, final boolean asDelta) {
assert !bin.hasCachedChildren();
assert !(bin.isBINDelta() && !asDelta);
final EnvironmentImpl envImpl = bin.getEnv();
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
final boolean canMutate = !asDelta && bin.canMutateToBINDelta();
int flags = 0;
if (asDelta) {
flags |= BIN_FLAG_DELTA;
}
if (canMutate) {
flags |= BIN_FLAG_CAN_MUTATE;
}
if (bin.getProhibitNextDelta()) {
flags |= BIN_FLAG_PROHIBIT_NEXT_DELTA;
}
final short lnIdSize = getPackedLnMemIdSize(bin);
/*
* If there are any LNs, then we should not be mutating from a full BIN
* to a BIN delta -- this isn't handled and should not happen.
*/
assert !(asDelta && !bin.isBINDelta() && lnIdSize != 0);
final int memSize =
checksumSize + 1 + 8 + 8 + 4 + 2 +
lnIdSize + bin.getLogSize(asDelta);
final long memId = allocateMemory(envImpl, memSize);
if (memId == 0) {
return 0;
}
final byte[] buf = new byte[memSize];
int bufOffset = checksumSize;
buf[bufOffset] = (byte) flags;
bufOffset += 1;
putLong(bin.getLastFullLsn(), buf, bufOffset);
bufOffset += 8;
putLong(bin.getLastDeltaLsn(), buf, bufOffset);
bufOffset += 8;
putInt(getMinExpiration(bin), buf, bufOffset);
bufOffset += 4;
putShort(lnIdSize, buf, bufOffset);
bufOffset += 2;
if (lnIdSize > 0) {
packLnMemIds(bin, buf, bufOffset);
bufOffset += lnIdSize;
}
final ByteBuffer byteBuf =
ByteBuffer.wrap(buf, bufOffset, buf.length - bufOffset);
bin.serialize(byteBuf, asDelta, false /*clearDirtyBits*/);
if (useChecksums) {
final Checksum checksum = Adler32.makeChecksum();
checksum.update(buf, checksumSize, buf.length - checksumSize);
final int checksumValue = (int) checksum.getValue();
putInt(checksumValue, memId, 0, buf);
}
allocator.copy(buf, 0, memId, 0, buf.length);
nBINsStored.incrementAndGet();
cachedBINs.incrementAndGet();
if (asDelta) {
cachedBINDeltas.incrementAndGet();
}
return memId;
}
public BIN materializeBIN(final EnvironmentImpl envImpl,
final byte[] buf) {
final ParsedBIN pb = parseBINBytes(
envImpl, buf, false /*partialBuf*/, true /*parseLNIds*/);
final BIN bin = materializeBIN(pb, (pb.flags & BIN_FLAG_DELTA) != 0);
nBINsLoaded.incrementAndGet();
return bin;
}
private BIN materializeBIN(final ParsedBIN pb, final boolean asDelta) {
final BIN bin = new BIN();
bin.materialize(
pb.binBytes, LogEntryType.LOG_VERSION, asDelta /*deltasOnly*/,
(pb.flags & BIN_FLAG_LOGGED_FULL_VERSION) != 0 /*clearDirtyBits*/);
bin.setLastFullLsn(pb.lastFullLsn);
bin.setLastDeltaLsn(pb.lastDeltaLsn);
bin.setProhibitNextDelta(
(pb.flags & BIN_FLAG_PROHIBIT_NEXT_DELTA) != 0);
if (pb.lnMemIds != null) {
for (int i = 0; i < pb.lnMemIds.length; i += 1) {
final long lnMemId = pb.lnMemIds[i];
if (lnMemId == 0) {
continue;
}
bin.setOffHeapLNId(i, lnMemId);
}
}
return bin;
}
public INLogEntry<BIN> createBINLogEntryForCheckpoint(final IN parent,
final int index) {
final int entry = parent.getOffHeapBINId(index);
if (entry < 0 || !parent.isOffHeapBINDirty(index)) {
return null;
}
assert parent == getOwner(entry);
final long memId = getMemId(entry);
return createBINLogEntry(
memId, entry, parent, true /*preserveBINInCache*/);
}
public void postBINLog(final IN parent,
final int index,
final INLogEntry<BIN> logEntry,
final long newLsn) {
assert parent.isLatchExclusiveOwner();
assert parent.getInListResident();
final EnvironmentImpl envImpl = parent.getEnv();
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
final boolean isDelta = logEntry.isBINDelta();
final int entry = parent.getOffHeapBINId(index);
assert entry >= 0;
assert parent.isOffHeapBINDirty(index);
final BIN bin =
logEntry.isPreSerialized() ? null : logEntry.getMainItem();
/*
* Update checksum, flags and last full/delta LSNs.
*/
final long memId = getMemId(entry);
final byte[] buf = new byte[checksumSize + 1 + 8 + 8];
allocator.copy(memId, 0, buf, 0, buf.length);
int bufOffset = 0;
/* The checksum is now invalid. */
if (useChecksums) {
putInt(0, buf, 0);
bufOffset += checksumSize;
}
/* Update flags. */
int flags = buf[bufOffset];
if (!isDelta) {
flags |= BIN_FLAG_LOGGED_FULL_VERSION;
}
flags &= ~BIN_FLAG_PROHIBIT_NEXT_DELTA;
buf[bufOffset] = (byte) flags;
bufOffset += 1;
/* Update lastFullLsn. */
if (!isDelta) {
putLong(newLsn, buf, bufOffset);
}
bufOffset += 8;
/* Update lastDeltaLsn. */
putLong(isDelta ? newLsn : DbLsn.NULL_LSN, buf, bufOffset);
bufOffset += 8;
assert bufOffset == buf.length;
allocator.copy(buf, 0, memId, 0, buf.length);
/* Move from pri2 LRU list to back of pri1 LRU list. */
if (parent.isOffHeapBINPri2(index)) {
pri2LRUSet[entry % numLRULists].remove(entry);
moveBack(entry, false /*pri2*/);
}
parent.setOffHeapBINId(
index, entry, false /*pri2*/, false /*dirty*/);
if (bin != null) {
bin.releaseLatch();
}
}
private INLogEntry<BIN> createBINLogEntry(
final long memId,
final int entry,
final IN parent,
final boolean preserveBINInCache) {
final EnvironmentImpl envImpl = parent.getEnv();
final byte[] buf = getMemBytes(memId);
final ParsedBIN pb = parseBINBytes(
envImpl, buf, false /*partialBuf*/, false /*parseLNIds*/);
final boolean isDelta = (pb.flags & BIN_FLAG_DELTA) != 0;
final boolean canMutateToDelta = (pb.flags & BIN_FLAG_CAN_MUTATE) != 0;
/*
* If the BIN is a delta, we must log a delta. In that case we cannot
* compress expired slots, and we can log the pre-serialized BIN.
*/
if (isDelta) {
return new BINDeltaLogEntry(
pb.binBytes, pb.lastFullLsn, pb.lastDeltaLsn,
LogEntryType.LOG_BIN_DELTA, parent);
}
/*
* For a full BIN, normally we log a delta iff it can be mutated to a
* delta. However, if any slots are expired, we attempt to compress
* them and then determine whether to log a delta.
*
* This mimics the logic in IN.logInternal, for BINs in main cache.
*
* TODO: Use materialized BIN in the log entry when any slot has an
* expiration time, even if none are currently expired. Such BINs must
* be materialized during logging for expiration tracking anyway by
* INLogEntry.getBINWithExpiration. Then the getBINWithExpiration and
* BIN.mayHaveExpirationValues methods will no longer be needed.
*/
final boolean hasExpiredSlot =
envImpl.isExpired(pb.minExpiration, true /*hours*/);
/*
* If we do not need to log a full BIN as a delta, or to compress its
* expired slots, then we can log the pre-serialized BIN.
*/
if (!hasExpiredSlot && !canMutateToDelta) {
return new INLogEntry<>(
pb.binBytes, pb.lastFullLsn, pb.lastDeltaLsn,
LogEntryType.LOG_BIN, parent);
}
/*
* We must materialize the full BIN in order to log it as a delta or to
* compress its expired slots.
*/
final BIN bin = materializeBIN(pb, false /*asDelta*/);
/*
* Latch the BIN to avoid assertions during BIN.writeToLog. A side
* effect is setting the Database, which is also needed.
*/
bin.latchNoUpdateLRU(parent.getDatabase());
final boolean logDelta;
if (hasExpiredSlot) {
final int origNSlots = bin.getNEntries();
/* Compress non-dirty slots before determining delta status. */
bin.compress(false /*compressDirtySlots*/, null);
logDelta = bin.shouldLogDelta();
/* Also compress dirty slots, if we will not log a delta. */
if (!logDelta) {
bin.compress(true /*compressDirtySlots*/, null);
}
/* If we compressed an expired slot, re-serialize the BIN. */
if (preserveBINInCache && origNSlots != bin.getNEntries()) {
final long newMemId = serializeBIN(bin, bin.isBINDelta());
if (newMemId == 0) {
/*
* TODO: Is invalid if compressed slot had off-heap LN.
* Should discard off-heap BIN and install 'bin' in main.
*/
return new INLogEntry<>(
pb.binBytes, pb.lastFullLsn, pb.lastDeltaLsn,
LogEntryType.LOG_BIN, parent);
}
freeMemory(memId);
setOwnerAndMemId(entry, parent, newMemId);
}
} else {
assert canMutateToDelta;
logDelta = true;
}
return logDelta ?
(new BINDeltaLogEntry(bin)) :
(new INLogEntry<>(bin));
}
public static class BINInfo {
public final boolean isBINDelta;
public final long fullBINLsn;
private BINInfo(final ParsedBIN pb) {
isBINDelta = (pb.flags & BIN_FLAG_DELTA) != 0;
fullBINLsn = pb.lastFullLsn;
}
}
public BINInfo getBINInfo(final EnvironmentImpl envImpl, final int entry) {
assert entry >= 0;
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
final long memId = getMemId(entry);
final byte[] buf = new byte[checksumSize + 1 + 8 + 8 + 4];
allocator.copy(memId, 0, buf, 0, buf.length);
final ParsedBIN pb = parseBINBytes(
envImpl, buf, true /*partialBuf*/, false /*parseLNIds*/);
return new BINInfo(pb);
}
long getINSize(final IN in) {
if (in.isBIN()) {
final BIN bin = (BIN) in;
if (!bin.hasOffHeapLNs()) {
return 0;
}
long size = 0;
for (int i = 0; i < in.getNEntries(); i += 1) {
final long memId = bin.getOffHeapLNId(i);
if (memId == 0) {
continue;
}
size += allocator.totalSize(memId);
}
return size;
}
if (in.getNormalizedLevel() != 2) {
return 0;
}
if (!in.hasOffHeapBINIds()) {
return 0;
}
final EnvironmentImpl envImpl = in.getEnv();
long size = 0;
for (int i = 0; i < in.getNEntries(); i += 1) {
final int entry = in.getOffHeapBINId(i);
if (entry < 0) {
continue;
}
final long memId = getMemId(entry);
size += allocator.totalSize(memId);
if (in.getTarget(i) != null) {
/* Redundant BIN, do not count off-heap LNs here. */
continue;
}
final ParsedBIN pb = parseBINBytes(
envImpl, getMemBytes(memId),
false /*partialBuf*/, true /*parseLNIds*/);
if (pb.lnMemIds == null) {
continue;
}
for (final long lnMemId : pb.lnMemIds) {
if (lnMemId == 0) {
continue;
}
size += allocator.totalSize(lnMemId);
}
}
return size;
}
private static class ParsedBIN {
final int flags;
final long[] lnMemIds;
final long lastFullLsn;
final long lastDeltaLsn;
final int minExpiration;
final ByteBuffer binBytes;
ParsedBIN(final int flags,
final long[] lnMemIds,
final long lastFullLsn,
final long lastDeltaLsn,
final int minExpiration,
final ByteBuffer binBytes) {
this.flags = flags;
this.lnMemIds = lnMemIds;
this.lastFullLsn = lastFullLsn;
this.lastDeltaLsn = lastDeltaLsn;
this.minExpiration = minExpiration;
this.binBytes = binBytes;
}
}
private ParsedBIN parseBINBytes(final EnvironmentImpl envImpl,
final byte[] buf,
final boolean partialBuf,
final boolean parseLNIds) {
assert !(partialBuf && parseLNIds);
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
if (useChecksums && !partialBuf) {
final int storedChecksum = getInt(buf, 0);
if (storedChecksum != 0) {
final Checksum checksum = Adler32.makeChecksum();
checksum.update(buf, checksumSize, buf.length - checksumSize);
final int checksumValue = (int) checksum.getValue();
if (storedChecksum != checksumValue) {
throw unexpectedState(
envImpl,
"Off-heap cache checksum error. Expected " +
storedChecksum + " but got " + checksumValue);
}
}
}
int bufOffset = checksumSize;
final int flags = buf[bufOffset];
bufOffset += 1;
final long lastFullLsn = getLong(buf, bufOffset);
bufOffset += 8;
final long lastDeltaLsn = getLong(buf, bufOffset);
bufOffset += 8;
final int minExpiration = getInt(buf, bufOffset);
bufOffset += 4;
if (partialBuf) {
return new ParsedBIN(
flags, null, lastFullLsn, lastDeltaLsn, minExpiration, null);
}
final short lnIdsSize = getShort(buf, bufOffset);
bufOffset += 2;
/* lnIdsSize was negated if LNs were stripped by eviction. */
final long[] lnMemIds;
if (lnIdsSize > 0 && parseLNIds) {
lnMemIds = unpackLnMemIds(buf, bufOffset, lnIdsSize);
} else {
lnMemIds = null;
}
bufOffset += Math.abs(lnIdsSize);
final ByteBuffer byteBuf =
ByteBuffer.wrap(buf, bufOffset, buf.length - bufOffset);
return new ParsedBIN(
flags, lnMemIds, lastFullLsn, lastDeltaLsn, minExpiration,
byteBuf);
}
/**
* Returns the minimum expiration time in hours, or zero of no slots
* have an expiration time.
*/
private int getMinExpiration(final BIN bin) {
int minExpire = 0;
for (int i = 0; i < bin.getNEntries(); i += 1) {
int expire = bin.getExpiration(i);
if (expire == 0) {
continue;
}
if (minExpire > expire || minExpire == 0) {
minExpire = expire;
}
}
if (minExpire == 0) {
return 0;
}
return bin.isExpirationInHours() ? minExpire : (minExpire * 24);
}
/**
* Adds LN memIds to the buffer using an RLE approach to save space:
*
* - The memIds are packed in slot index order. All slots are represented.
* - A positive byte indicates the number of 8-byte memIds that follow.
* - A negative byte indicates the number of slots that have no memId.
* - When a run exceeds 127 slots, another run is added. So there is no
* effective limit on number of slots, although we know the maximum will
* fit in a short integer.
*/
private static void packLnMemIds(final BIN bin,
final byte[] buf,
int off) {
int nOff = off;
off += 1;
byte n = 0;
for (int i = 0; i < bin.getNEntries(); i += 1) {
final long memId = bin.getOffHeapLNId(i);
if (memId != 0) {
if (n < 0 || n == 127) {
buf[nOff] = n;
nOff = off;
off += 1;
n = 0;
}
putLong(memId, buf, off);
off += 8;
n += 1;
} else {
if (n > 0 || n == -127) {
buf[nOff] = n;
nOff = off;
off += 1;
n = 0;
}
n -= 1;
}
}
buf[nOff] = n;
}
private static short getPackedLnMemIdSize(final BIN bin) {
if (!bin.hasOffHeapLNs()) {
return 0;
}
int off = 1;
byte n = 0;
for (int i = 0; i < bin.getNEntries(); i += 1) {
if (bin.getOffHeapLNId(i) != 0) {
if (n < 0 || n == 127) {
off += 1;
n = 0;
}
off += 8;
n += 1;
} else {
if (n > 0 || n == -127) {
off += 1;
n = 0;
}
n -= 1;
}
}
if (off > Short.MAX_VALUE) {
throw unexpectedState();
}
return (short) off;
}
private static long[] unpackLnMemIds(final byte[] buf,
final int startOff,
final int len) {
assert len > 0;
final int endOff = startOff + len;
int off = startOff;
int i = 0;
while (off < endOff) {
final int n = buf[off];
off += 1;
if (n > 0) {
off += n * 8;
i += n;
} else {
assert n < 0;
i -= n;
}
}
final long[] ids = new long[i + 1];
off = startOff;
i = 0;
while (off < endOff) {
int n = buf[off];
off += 1;
if (n > 0) {
while (n > 0) {
ids[i] = getLong(buf, off);
off += 8;
i += 1;
n -= 1;
}
} else {
assert n < 0;
i -= n;
}
}
return ids;
}
private long allocateMemory(final EnvironmentImpl envImpl,
final int size) {
/*
* Only enable the off-heap cache after recovery. This ensures
* that off-heap memory is available to recovery as file system
* cache, which is important when performing multiple passes over
* the recovery interval.
*/
if (!envImpl.isValid()) {
return 0;
}
long memId = 0;
try {
memId = allocator.allocate(size);
totalBlocks.incrementAndGet();
if (DEBUG_DOUBLE_FREE) {
final Long key = memId;
freedBlocks.remove(key);
prevFreedBlocks.remove(key);
}
} catch (OutOfMemoryError e) {
LoggerUtils.envLogMsg(
Level.SEVERE, envImpl,
"OutOfMemoryError trying to allocate in the off-heap cache. " +
"Continuing, but more problems are likely. Allocator error: " +
e.getMessage());
nAllocFailure.incrementAndGet();
memoryLimit = allocator.getUsedBytes() - evictBytes;
} catch (OffHeapAllocator.OffHeapOverflowException e) {
nAllocOverflow.incrementAndGet();
memoryLimit = allocator.getUsedBytes();
}
if (needEviction()) {
wakeUpEvictionThreads();
}
return memId;
}
private int freeMemory(final long memId) {
if (DEBUG_DOUBLE_FREE) {
final Long key = memId;
boolean added = false;
Exception e = null;
Map<Long, Exception> curr = freedBlocks;
Map<Long, Exception> prev = prevFreedBlocks;
if (freedBlocks.size() >= DEBUG_FREE_BLOCKS_PER_MAP) {
synchronized (this) {
if (freedBlocks.size() >= DEBUG_FREE_BLOCKS_PER_MAP) {
prevFreedBlocks = freedBlocks;
freedBlocks = new ConcurrentHashMap<>();
e = freedBlocks.put(
key, new Exception("Freed: " + memId));
added = true;
curr = freedBlocks;
prev = prevFreedBlocks;
}
}
}
if (!added) {
e = curr.put(key, new Exception("Freed: " + memId));
}
if (e != null) {
new Exception(
"Double-freed: " + memId + "\n" +
LoggerUtils.getStackTrace(e)).printStackTrace();
}
if (curr != prev) {
e = prev.get(key);
if (e != null) {
new Exception(
"Double-freed: " + memId + "\n" +
LoggerUtils.getStackTrace(e)).printStackTrace();
}
}
}
totalBlocks.decrementAndGet();
return allocator.free(memId);
}
private byte[] getMemBytes(final long memId) {
final byte[] bytes = new byte[allocator.size(memId)];
allocator.copy(memId, 0, bytes, 0, bytes.length);
return bytes;
}
private byte getByte(final long memId,
final int offset,
final byte[] tempBuf) {
allocator.copy(memId, offset, tempBuf, 0, 1);
return tempBuf[0];
}
private void putShort(final short val,
final long memId,
final int offset,
final byte[] tempBuf) {
putShort(val, tempBuf, 0);
allocator.copy(tempBuf, 0, memId, offset, 2);
}
private short getShort(final long memId,
final int offset,
final byte[] tempBuf) {
allocator.copy(memId, offset, tempBuf, 0, 2);
return getShort(tempBuf, 0);
}
private void putInt(final int val,
final long memId,
final int offset,
final byte[] tempBuf) {
putInt(val, tempBuf, 0);
allocator.copy(tempBuf, 0, memId, offset, 4);
}
private int getInt(final long memId,
final int offset,
final byte[] tempBuf) {
allocator.copy(memId, offset, tempBuf, 0, 4);
return getInt(tempBuf, 0);
}
private void putLong(final long val,
final long memId,
final int offset,
final byte[] tempBuf) {
putLong(val, tempBuf, 0);
allocator.copy(tempBuf, 0, memId, offset, 8);
}
private long getLong(final long memId,
final int offset,
final byte[] tempBuf) {
allocator.copy(memId, offset, tempBuf, 0, 8);
return getLong(tempBuf, 0);
}
private static void putShort(final short val,
final byte[] buf,
final int offset) {
buf[offset] = (byte) (val >> 8);
buf[offset + 1] = (byte) val;
}
private static short getShort(final byte[] buf,
final int offset) {
return (short)
((buf[offset] << 8) |
(buf[offset + 1] & 0xff));
}
private static void putInt(final int val,
final byte[] buf,
final int offset) {
buf[offset] = (byte) (val >> 24);
buf[offset + 1] = (byte) (val >> 16);
buf[offset + 2] = (byte) (val >> 8);
buf[offset + 3] = (byte) val;
}
private static int getInt(final byte[] buf,
final int offset) {
return
((buf[offset] << 24) |
((buf[offset + 1] & 0xff) << 16) |
((buf[offset + 2] & 0xff) << 8) |
(buf[offset + 3] & 0xff));
}
private static void putLong(final long val,
final byte[] buf,
final int offset) {
buf[offset] = (byte) (val >> 56);
buf[offset + 1] = (byte) (val >> 48);
buf[offset + 2] = (byte) (val >> 40);
buf[offset + 3] = (byte) (val >> 32);
buf[offset + 4] = (byte) (val >> 24);
buf[offset + 5] = (byte) (val >> 16);
buf[offset + 6] = (byte) (val >> 8);
buf[offset + 7] = (byte) val;
}
private static long getLong(final byte[] buf,
final int offset) {
return
(((long)buf[offset] << 56) |
(((long)buf[offset + 1] & 0xff) << 48) |
(((long)buf[offset + 2] & 0xff) << 40) |
(((long)buf[offset + 3] & 0xff) << 32) |
(((long)buf[offset + 4] & 0xff) << 24) |
(((long)buf[offset + 5] & 0xff) << 16) |
(((long)buf[offset + 6] & 0xff) << 8) |
((long)buf[offset + 7] & 0xff));
}
public void doCriticalEviction(boolean backgroundIO) {
if (needEviction()) {
wakeUpEvictionThreads();
if (needCriticalEviction()) {
evictBatch(EvictionSource.CRITICAL, backgroundIO);
}
}
}
public void doDaemonEviction(boolean backgroundIO) {
if (needEviction()) {
wakeUpEvictionThreads();
if (needCriticalEviction()) {
evictBatch(EvictionSource.DAEMON, backgroundIO);
}
}
}
public void doManualEvict() {
if (!isEnabled()) {
return;
}
evictBatch(EvictionSource.MANUAL, true /*backgroundIO*/);
}
private void wakeUpEvictionThreads() {
if (!runEvictorThreads || !isEnabled()) {
return;
}
/*
* This check is meant to avoid the lock taken by
* ArrayBlockingQueue.offer() when this is futile. The lock reduces
* concurrency because this method is called so frequently.
*/
if (activePoolThreads.get() >= maxPoolThreads) {
return;
}
evictionPool.execute(new Runnable() {
@Override
public void run() {
activePoolThreads.incrementAndGet();
try {
evictBatch(
EvictionSource.EVICTORTHREAD, true /*backgroundIO*/);
} finally {
activePoolThreads.decrementAndGet();
}
}
});
}
private boolean needEviction() {
if (!isEnabled()) {
return false;
}
/*
* When off-heap cache size is set to zero after being non-zero, we
* perform eviction only until the cache becomes empty.
*/
if (maxMemory == 0) {
return allocator.getUsedBytes() >= 0;
}
return allocator.getUsedBytes() + evictBytes >= memoryLimit;
}
private boolean needCriticalEviction() {
if (!isEnabled()) {
return false;
}
/*
* When off-heap cache size is set to zero after being non-zero, we
* perform only non-critical eviction.
*/
if (maxMemory == 0) {
return false;
}
return allocator.getUsedBytes() >= memoryLimit;
}
private int getLRUSize(final LRUList[] listSet) {
int size = 0;
for (final LRUList l : listSet) {
size += l.getSize();
}
return size;
}
private void evictBatch(final EvictionSource source,
final boolean backgroundIO) {
final long maxBytesToEvict =
evictBytes + (allocator.getUsedBytes() - memoryLimit);
long bytesEvicted = 0;
boolean pri2 = false;
int maxLruEntries = getLRUSize(pri1LRUSet);
int nLruEntries = 0;
while (bytesEvicted < maxBytesToEvict &&
needEviction() &&
!shutdownRequested.get()) {
if (nLruEntries >= maxLruEntries) {
if (pri2) {
break;
}
pri2 = true;
maxLruEntries = getLRUSize(pri2LRUSet);
nLruEntries = 0;
}
final LRUList lru;
if (pri2) {
final int lruIdx =
Math.abs(nextPri2LRUList++) % numLRULists;
lru = pri2LRUSet[lruIdx];
} else {
final int lruIdx =
Math.abs(nextPri1LRUList++) % numLRULists;
lru = pri1LRUSet[lruIdx];
}
final int entry = lru.removeFront();
nLruEntries += 1;
if (entry < 0) {
continue;
}
bytesEvicted += evictOne(source, backgroundIO, entry, lru, pri2);
}
}
private long evictOne(final EvictionSource source,
final boolean backgroundIO,
final int entry,
final LRUList lru,
final boolean pri2) {
nNodesTargeted.incrementAndGet();
if (source == EvictionSource.CRITICAL) {
nCriticalNodesTargeted.incrementAndGet();
}
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
/*
* Note that almost anything could have happened in other threads
* after removing the entry from the LRU and prior to latching the
* owner IN. We account for these possibilities below.
*
* When we decide to "skip" an entry, it is not added back to the LRU
* and it is not freed. The assumption is that another thread is
* processing the entry and will add it to the LRU or free it.
*/
final IN in = chunk.owners[chunkIdx];
/*
* If the IN is null, skip the entry. The IN may have been evicted.
*/
if (in == null) {
nNodesSkipped.incrementAndGet();
return 0;
}
final EnvironmentImpl envImpl = in.getEnv();
in.latchNoUpdateLRU();
try {
/*
* If the owner has changed or the IN was evicted, skip it.
*/
if (in != chunk.owners[chunkIdx] ||
!in.getInListResident()) {
nNodesSkipped.incrementAndGet();
return 0;
}
/*
* If owner is a BIN, it is in the main cache but may have
* off-heap LNs.
*/
if (in.isBIN()) {
final BIN bin = (BIN) in;
/*
* If entry is no longer associated with this BIN, skip it.
*/
if (bin.getOffHeapLruId() != entry) {
nNodesSkipped.incrementAndGet();
return 0;
}
/*
* If the entry was added back to the LRU, skip it. This check
* requires synchronizing on the LRUList after latching the IN.
* We know we're checking the correct LRUList because an entry
* with a BIN owner can never be in the priority 2 LRU set.
*/
if (lru.contains(chunk, chunkIdx)) {
nNodesSkipped.incrementAndGet();
return 0;
}
return stripLNsFromMainBIN(bin, entry, pri2);
}
/*
* The owner has a child BIN that is off-heap.
*/
int index = -1;
for (int i = 0; i < in.getNEntries(); i += 1) {
if (in.getOffHeapBINId(i) == entry) {
index = i;
break;
}
}
/*
* If entry is no longer associated with this IN, skip it.
*/
if (index < 0) {
nNodesSkipped.incrementAndGet();
return 0;
}
/*
* If the entry was moved between a pri1 and pri2 LRU list, skip
* it. This means that the LRUList from which we removed the entry
* is not the list it belongs to.
*/
if (pri2 != in.isOffHeapBINPri2(index)) {
nNodesSkipped.incrementAndGet();
return 0;
}
/*
* If the entry was added back to the LRU, skip it. This check
* requires synchronizing on the LRUList after latching the IN, and
* it requires that we're using the correct LRU list (the check
* above).
*/
if (lru.contains(chunk, chunkIdx)) {
nNodesSkipped.incrementAndGet();
return 0;
}
/*
* The BIN should never be resident in main.
*/
final BIN residentBIN = (BIN) in.getTarget(index);
if (residentBIN != null) {
throw EnvironmentFailureException.unexpectedState(
envImpl, "BIN is resident in both caches, id=" +
residentBIN.getNodeId());
}
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
final long memId = chunk.memIds[chunkIdx];
final int flags = getByte(memId, checksumSize, new byte[1]);
final boolean dirty = in.isOffHeapBINDirty(index);
/*
* First try stripping LNs.
*/
final long nLNBytesEvicted = stripLNs(
entry, pri2, dirty, memId, chunk, chunkIdx, in, index,
backgroundIO);
if (nLNBytesEvicted > 0) {
return nLNBytesEvicted;
}
/*
* Next try mutating a full BIN to a BIN-delta.
*/
if ((flags & BIN_FLAG_CAN_MUTATE) != 0) {
final long nBytesEvicted = mutateToBINDelta(
envImpl, in.getDatabase(), entry, pri2,
chunk, chunkIdx);
if (nBytesEvicted > 0) {
return nBytesEvicted;
}
}
/*
* If it is in the pri1 list and is dirty with no resident LNs,
* move it to the pri2 list. We currently have no stat for this.
*/
if (!pri2 && dirty) {
moveBack(entry, true /*pri2*/);
in.setOffHeapBINId(
index, entry, true /*pri2*/, true /*dirty*/);
return 0;
}
/*
* Log the BIN if it is dirty and finally just get rid of it.
*/
return flushAndDiscardBIN(
entry, pri2, dirty, memId, in, index, backgroundIO,
false /*freeLNs*/);
} finally {
in.releaseLatch();
}
}
/**
* Strip off-heap LNs referenced by a main cache BIN. If there are any
* off-heap expired LNs, strip only them. Otherwise, strip all LNs.
*/
private long stripLNsFromMainBIN(final BIN bin,
final int entry,
final boolean pri2) {
/*
* Strip expired LNs first.
*/
int nEvicted = 0;
long nBytesEvicted = 0;
boolean anyNonExpired = false;
for (int i = 0; i < bin.getNEntries(); i += 1) {
if (bin.getOffHeapLNId(i) == 0) {
continue;
}
if (bin.isExpired(i)) {
nBytesEvicted += freeLN(bin, i);
nEvicted += 1;
} else {
anyNonExpired = true;
}
}
/*
* If any were expired, return. If any non-expired LNs remain, put back
* the entry on the LRU list, leaving the non-expired LNs resident.
* Also compress the BIN to free the expired slots in the main cache.
*/
if (nEvicted > 0) {
if (anyNonExpired) {
moveBack(entry, pri2);
} else {
bin.setOffHeapLruId(-1);
freeEntry(entry);
}
bin.getEnv().lazyCompress(bin);
nLNsEvicted.addAndGet(nEvicted);
nNodesStripped.incrementAndGet();
return nBytesEvicted;
}
/*
* No expired LNs are present. Strip the non-expired LNs.
*/
for (int i = 0; i < bin.getNEntries(); i += 1) {
final int lnBytes = freeLN(bin, i);
if (lnBytes == 0) {
continue;
}
nBytesEvicted += lnBytes;
nEvicted += 1;
}
if (nEvicted > 0) {
nLNsEvicted.addAndGet(nEvicted);
nNodesStripped.incrementAndGet();
} else {
nNodesSkipped.incrementAndGet();
}
/*
* No LNs are off-heap now, so remove the entry.
*/
bin.setOffHeapLruId(-1);
freeEntry(entry);
return nBytesEvicted;
}
public long stripLNs(final IN parent, final int index) {
assert parent.isLatchExclusiveOwner();
assert parent.getInListResident();
final int entry = parent.getOffHeapBINId(index);
assert entry >= 0;
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
final boolean pri2 = parent.isOffHeapBINPri2(index);
final boolean dirty = parent.isOffHeapBINDirty(index);
final long memId = chunk.memIds[chunkIdx];
return stripLNs(
entry, pri2, dirty, memId, chunk, chunkIdx, parent, index, false);
}
private long stripLNs(final int entry,
final boolean pri2,
final boolean dirty,
final long memId,
final Chunk chunk,
final int chunkIdx,
final IN parent,
final int index,
final boolean backgroundIO) {
final EnvironmentImpl envImpl = parent.getEnv();
final boolean useChecksums = envImpl.useOffHeapChecksums();
final int checksumSize = useChecksums ? CHECKSUM_SIZE : 0;
/*
* Contents of headBuf:
* flags, fullLsn, deltaLsn, minExpiration, lnIdsSize.
* Note that headBuf does not contain the checksum.
* Contents of memId following headBuf fields: LN mem Ids, BIN.
*/
final byte[] headBuf = new byte[1 + 8 + 8 + 4 + 2];
allocator.copy(memId, checksumSize, headBuf, 0, headBuf.length);
final int memHeadLen = checksumSize + headBuf.length;
final byte flags = headBuf[0];
int bufOffset = 1 + 8 + 8;
final int minExpiration = getInt(headBuf, bufOffset);
bufOffset += 4;
final short lnIdsSize = getShort(headBuf, bufOffset);
bufOffset += 2;
assert bufOffset == headBuf.length;
int nEvicted = 0;
long nBytesEvicted = 0;
/*
* If this is a full BIN and any slot may be expired, then materialize
* the BIN, evict expired off-heap LNs, compress expired slots, and
* re-serialize the BIN.
*/
if ((flags & BIN_FLAG_DELTA) == 0 &&
envImpl.isExpired(minExpiration, true /*hours*/)) {
final BIN bin = materializeBIN(envImpl, getMemBytes(memId));
bin.latchNoUpdateLRU(parent.getDatabase());
try {
for (int i = 0; i < bin.getNEntries(); i += 1) {
if (bin.getOffHeapLNId(i) == 0 ||
!bin.isExpired(i)) {
continue;
}
nBytesEvicted += freeLN(bin, i);
nEvicted += 1;
}
/*
* TODO: Compression is expensive because we must re-serialize
* the BIN. It may be more efficient to only proceed to
* compression if no LNs were freed above, although we would
* first need to clear the LN memIds.
*/
final int origNSlots = bin.getNEntries();
bin.compress(
!bin.shouldLogDelta() /*compressDirtySlots*/, null);
/*
* If we compressed any expired slots, re-serialize the BIN.
* Also re-serialize in the rare case than an LN was freed but
* no slot was compressed due to record locks; if we did not do
* this, the invalid/freed LN memId would not be cleared.
*/
if (origNSlots != bin.getNEntries() || nEvicted > 0) {
final long newMemId = serializeBIN(bin, bin.isBINDelta());
if (newMemId == 0) {
/*
* When allocations are failing, freeing the BIN is the
* simplest and most productive thing to do.
*/
nBytesEvicted += flushAndDiscardBIN(
entry, pri2, dirty, memId, parent, index,
backgroundIO, true /*freeLNs*/);
return nBytesEvicted;
}
nBytesEvicted += freeMemory(memId);
nBytesEvicted -= allocator.totalSize(newMemId);
chunk.memIds[chunkIdx] = newMemId;
}
} finally {
bin.releaseLatch();
}
/* Return if we freed any memory by LN eviction or compression. */
if (nBytesEvicted > 0) {
nLNsEvicted.addAndGet(nEvicted);
nNodesStripped.incrementAndGet();
moveBack(entry, pri2);
return nBytesEvicted;
}
}
if (lnIdsSize <= 0) {
return 0;
}
final byte[] lnBuf = new byte[lnIdsSize];
allocator.copy(memId, memHeadLen, lnBuf, 0, lnBuf.length);
final long[] lnMemIds = unpackLnMemIds(lnBuf, 0, lnIdsSize);
for (final long lnMemId : lnMemIds) {
if (lnMemId == 0) {
continue;
}
nBytesEvicted += freeLN(lnMemId);
nEvicted += 1;
}
assert nEvicted > 0;
if (lnIdsSize <= MAX_UNUSED_BIN_BYTES) {
/*
* When there are only a small number of LN memIds, we can tolerate
* the wasted space in the BIN so we just negate the size.
*/
final byte[] tempBuf = new byte[8];
putShort((short) (-lnIdsSize), memId, memHeadLen - 2, tempBuf);
/* However, the checksum is now invalid. */
if (useChecksums) {
putInt(0, memId, 0, tempBuf);
}
} else {
/*
* When there are many LN memIds, we reclaim the space they use by
* copying the BIN to a smaller block and freeing the old block.
*/
final int newSize = allocator.size(memId) - lnIdsSize;
final long newMemId = allocateMemory(envImpl, newSize);
if (newMemId == 0) {
/*
* When allocations are failing, freeing the BIN is the
* simplest and most productive thing to do.
*/
nBytesEvicted += flushAndDiscardBIN(
entry, pri2, dirty, memId, parent, index, backgroundIO,
true /*freeLNs*/);
return nBytesEvicted;
}
nBytesEvicted -= allocator.totalSize(newMemId);
/*
* Copy all parts of the old BIN to the new, except for the
* checksum, lnIdsSize and the LN memIds. We don't need to set
* the checksum or lnIdsSize to zero in the new block because it
* was zero-filled when it was allocated. Instead we omit these
* fields when copying.
*
* The first copy includes all headBuf fields except for the
* lnIdsSize at the end of the buffer. The second copy includes
* the serialized BIN alone.
*/
allocator.copy(
headBuf, 0,
newMemId, checksumSize, headBuf.length - 2);
allocator.copy(
memId, memHeadLen + lnIdsSize,
newMemId, memHeadLen, newSize - memHeadLen);
nBytesEvicted += freeMemory(memId);
chunk.memIds[chunkIdx] = newMemId;
}
nLNsEvicted.addAndGet(nEvicted);
nNodesStripped.incrementAndGet();
moveBack(entry, pri2);
return nBytesEvicted;
}
public long mutateToBINDelta(final IN parent,
final int index) {
assert parent.isLatchExclusiveOwner();
assert parent.getInListResident();
final int entry = parent.getOffHeapBINId(index);
if (entry < 0) {
return 0;
}
final Chunk chunk = chunks[entry / CHUNK_SIZE];
final int chunkIdx = entry % CHUNK_SIZE;
return mutateToBINDelta(
parent.getEnv(), parent.getDatabase(), entry,
parent.isOffHeapBINPri2(index), chunk, chunkIdx);
}
private long mutateToBINDelta(final EnvironmentImpl envImpl,
final DatabaseImpl dbImpl,
final int entry,
final boolean pri2,
final Chunk chunk,
final int chunkIdx) {
final long memId = chunk.memIds[chunkIdx];
final BIN bin = materializeBIN(envImpl, getMemBytes(memId));
assert bin.getLastFullLsn() != DbLsn.NULL_LSN;
final long newMemId;
bin.latchNoUpdateLRU(dbImpl);
try {
newMemId = serializeBIN(bin, true /*asDelta*/);
} finally {
bin.releaseLatch();
}
if (newMemId == 0) {
return 0;
}
long nBytesEvicted = freeBIN(envImpl, memId, false /*freeLNs*/);
nBytesEvicted -= allocator.totalSize(newMemId);
chunk.memIds[chunkIdx] = newMemId;
nNodesMutated.incrementAndGet();
moveBack(entry, pri2);
return nBytesEvicted;
}
/**
* Logs the BIN child if it is dirty, and then discards it.
*
* @return bytes freed, or zero if a dirty BIN could not be logged due to
* a read-only env or disk limit violation.
*/
private long flushAndDiscardBIN(final int entry,
final boolean pri2,
final boolean dirty,
final long memId,
final IN parent,
final int index,
final boolean backgroundIO,
final boolean freeLNs) {
assert parent.isLatchExclusiveOwner();
final EnvironmentImpl envImpl = parent.getEnv();
if (DEBUG_TRACE) {
debug(
envImpl,
"Discard BIN LSN=" +
DbLsn.getNoFormatString(parent.getLsn(index)) +
" pri2=" + pri2 + " dirty=" + dirty);
}
if (dirty) {
/*
* Cannot evict dirty nodes in a read-only environment, or when a
* disk limit has been exceeded. We can assume that the cache will
* not overflow with dirty nodes because writes are prohibited.
*/
if (envImpl.isReadOnly() ||
envImpl.getDiskLimitViolation() != null) {
nNodesSkipped.incrementAndGet();
return 0;
}
final INLogEntry<BIN> logEntry = createBINLogEntry(
memId, entry, parent, false /*preserveBINInCache*/);
final Provisional provisional =
envImpl.coordinateWithCheckpoint(
parent.getDatabase(), IN.BIN_LEVEL, parent);
final long lsn = IN.logEntry(
logEntry, provisional, backgroundIO, parent);
parent.updateEntry(
index, lsn, VLSN.NULL_VLSN_SEQUENCE, 0 /*lastLoggedSize*/);
nDirtyNodesEvicted.incrementAndGet();
if (!logEntry.isPreSerialized()) {
logEntry.getMainItem().releaseLatch();
}
}
nNodesEvicted.incrementAndGet();
parent.clearOffHeapBINId(index);
remove(entry, pri2);
return freeBIN(envImpl, memId, freeLNs);
}
long testEvictMainBIN(final BIN bin) {
final int entry = bin.getOffHeapLruId();
assert entry >= 0;
final LRUList lru = pri1LRUSet[entry % numLRULists];
lru.remove(entry);
return evictOne(
EvictionSource.MANUAL, false /*backgroundIO*/, entry, lru,
false /*pri2*/);
}
long testEvictOffHeapBIN(final IN in, final int index) {
final int entry = in.getOffHeapBINId(index);
assert entry >= 0;
final boolean pri2 = in.isOffHeapBINPri2(index);
final int lruIdx = entry % numLRULists;
final LRUList lru =
pri2 ? pri2LRUSet[lruIdx] : pri1LRUSet[lruIdx];
lru.remove(entry);
return evictOne(
EvictionSource.MANUAL, false /*backgroundIO*/, entry, lru, pri2);
}
}