blob: 1978566a6c1e43f9df24d67837cf2d5a930d86bc [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.tree;
import static com.sleepycat.je.EnvironmentFailureException.unexpectedState;
import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.EXTINCT;
import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.NOT_EXTINCT;
import java.io.FileNotFoundException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.cleaner.PackedObsoleteInfo;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentFailureReason;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.dbi.MemoryBudget;
import com.sleepycat.je.dbi.TTL;
import com.sleepycat.je.evictor.Evictor;
import com.sleepycat.je.evictor.OffHeapCache;
import com.sleepycat.je.latch.LatchContext;
import com.sleepycat.je.latch.LatchFactory;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.LatchTable;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.ErasedException;
import com.sleepycat.je.log.LogEntryType;
import com.sleepycat.je.log.LogItem;
import com.sleepycat.je.log.LogParams;
import com.sleepycat.je.log.LogUtils;
import com.sleepycat.je.log.Loggable;
import com.sleepycat.je.log.Provisional;
import com.sleepycat.je.log.ReplicationContext;
import com.sleepycat.je.log.WholeEntry;
import com.sleepycat.je.log.entry.BINDeltaLogEntry;
import com.sleepycat.je.log.entry.INLogEntry;
import com.sleepycat.je.log.entry.LNLogEntry;
import com.sleepycat.je.log.entry.LogEntry;
import com.sleepycat.je.tree.dupConvert.DBIN;
import com.sleepycat.je.tree.dupConvert.DIN;
import com.sleepycat.je.tree.dupConvert.DupConvert;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.SizeofMarker;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;
/**
* An IN represents an Internal Node in the JE tree.
*
* Explanation of KD (KnownDeleted) and PD (PendingDelete) entry flags
* ===================================================================
*
* PD: set for all LN entries that are deleted, even before the LN is
* committed. Is used as an authoritative (transactionally correct) indication
* that an LN is deleted. PD will be cleared if the txn for the deleted LN is
* aborted.
*
* KD: set under special conditions for entries containing LNs which are known
* to be obsolete. Not used for entries in an active/uncommitted transaction.
*
* First notice that IN.fetchLN will allow a FileNotFoundException when the
* PD or KD flag is set on the entry. And it will allow a NULL_LSN when the KD
* flag is set.
*
* KD was implemented first, and was originally used when the cleaner attempts
* to migrate an LN and discovers it is deleted (see Cleaner.migrateLN). We
* need KD because the INCompressor may not have run, and may not have
* compressed the BIN. There's the danger that we'll try to fetch that entry,
* and that the file was deleted by the cleaner.
*
* KD was used more recently when an unexpected exception occurs while logging
* an LN, after inserting the entry. Rather than delete the entry to clean up,
* we mark the entry KD so it won't cause a fetch error later. In this case
* the entry LSN is NULL_LSN. See Tree.insertNewSlot.
*
* PD is closely related to the first use of KD above (for cleaned deleted LNs)
* and came about because of a cleaner optimization we make. The cleaner
* considers all deleted LN log entries to be obsolete, without doing a tree
* lookup, and without any record of an obsolete offset. This makes the cost
* of cleaning of deleted LNs very low. For example, if the log looks like
* this:
*
* 100 LNA
* 200 delete of LNA
*
* then LSN 200 will be considered obsolete when this file is processed by the
* cleaner. After all, only two things can happen: (1) the txn commits, and we
* don't need LSN 200, because we can wipe this LN out of the tree, or (2) the
* txn aborts, and we don't need LSN 200, because we are going to revert to LSN
* 100/LNA.
*
* We set PD for the entry of a deleted LN at the time of the operation, and we
* clear PD if the transaction aborts. There is no real danger that this log
* entry will be processed by the cleaner before it's committed, because
* cleaning can only happen after the first active LSN.
*
* Just as in the first use of KD above, setting PD is necessary to avoid a
* fetch error, when the file is deleted by the cleaner but the entry
* containing the deleted LN has not been deleted by the INCompressor.
*
* PD is also set in replication rollback, when LNs are marked as
* invisible.
*
* When LSN locking was implemented (see CursorImpl.lockLN), the PD flag took
* on additional meaning. PD is used to determine whether an LN is deleted
* without fetching it, and therefore is relied on to be transactionally
* correct.
*
* In addition to the setting and use of the KD/PD flags described above, the
* situation is complicated by the fact that we must restore the state of these
* flags during abort, recovery, and set them properly during slot reuse.
*
* We have been meaning to consider whether PD and KD can be consolidated into
* one flag: simply the Deleted flag. The Deleted flag would be set in the
* same way as PD is currently set, as well as the second use of KD described
* above (when the LSN is NULL_LSN after an insertion error). The use of KD
* and PD for invisible entries and recovery rollback should also be
* considered.
*
* If we consolidate the two flags and set the Deleted flag during a delete
* operation (like PD), we'll have to remove optimizations (in CursorImpl for
* example) that consider a slot deleted when KD is set. Since KD is rarely
* set currently, this shouldn't have a noticeable performance impact.
*/
public class IN extends Node implements Comparable<IN>, LatchContext {
private static final String BEGIN_TAG = "<in>";
private static final String END_TAG = "</in>";
private static final String TRACE_SPLIT = "Split:";
private static final String TRACE_DELETE = "Delete:";
private static final int BYTES_PER_LSN_ENTRY = 4;
public static final int MAX_FILE_OFFSET = 0xfffffe;
private static final int THREE_BYTE_NEGATIVE_ONE = 0xffffff;
/**
* Used as the "empty rep" for the INLongRep offHeapBINIds field.
*
* minLength is 3 because BIN IDs are LRU list indexes. Initially 100k
* indexes are allocated and the largest values are used first.
*
* allowSparseRep is true because some workloads will only load BIN IDs for
* a subset of the BINs in the IN.
*/
private static final INLongRep.EmptyRep EMPTY_OFFHEAP_BIN_IDS =
new INLongRep.EmptyRep(3, true);
/*
* Levels:
* The mapping tree has levels in the 0x20000 -> 0x2ffff number space.
* The main tree has levels in the 0x10000 -> 0x1ffff number space.
* The duplicate tree levels are in 0-> 0xffff number space.
*/
public static final int DBMAP_LEVEL = 0x20000;
public static final int MAIN_LEVEL = 0x10000;
public static final int LEVEL_MASK = 0x0ffff;
public static final int MIN_LEVEL = -1;
public static final int BIN_LEVEL = MAIN_LEVEL | 1;
/* Used to indicate that an exact match was found in findEntry. */
public static final int EXACT_MATCH = (1 << 16);
/* Used to indicate that an insert was successful. */
public static final int INSERT_SUCCESS = (1 << 17);
/*
* A bit flag set in the return value of partialEviction() to indicate
* whether the IN is evictable or not.
*/
public static final long NON_EVICTABLE_IN = (1L << 62);
/*
* Boolean properties of an IN, encoded as bits inside the flags
* data member.
*/
private static final int IN_DIRTY_BIT = 0x1;
private static final int IN_RECALC_TOGGLE_BIT = 0x2;
private static final int IN_IS_ROOT_BIT = 0x4;
private static final int IN_HAS_CACHED_CHILDREN_BIT = 0x8;
private static final int IN_PRI2_LRU_BIT = 0x10;
private static final int IN_DELTA_BIT = 0x20;
private static final int IN_FETCHED_COLD_BIT = 0x40;
private static final int IN_FETCHED_COLD_OFFHEAP_BIT = 0x80;
private static final int IN_RESIDENT_BIT = 0x100;
private static final int IN_PROHIBIT_NEXT_DELTA_BIT = 0x200;
private static final int IN_EXPIRATION_IN_HOURS = 0x400;
/* Tracing for LRU-related ops */
private static final boolean traceLRU = false;
private static final boolean traceDeltas = false;
private static final Level traceLevel = Level.INFO;
DatabaseImpl databaseImpl;
private int level;
/* The unique id of this node. */
long nodeId;
/* Some bits are persistent and some are not, see serialize. */
int flags;
/*
* The identifier key is a key that can be used used to search for this IN.
* Initially it is the key of the zeroth slot, but insertions prior to slot
* zero make this no longer true. It is always equal to some key in the
* IN, and therefore it is changed by BIN.compress when removing slots.
*/
private byte[] identifierKey;
int nEntries;
byte[] entryStates;
/*
* entryKeys contains the keys in their entirety if key prefixing is not
* being used. If prefixing is enabled, then keyPrefix contains the prefix
* and entryKeys contains the suffixes. Records with small enough data
* (smaller than the value je.tree.maxEmbeddedLN param) are stored in
* their entirity (both key (or key suffix) and data) inside BINs. This is
* done by combining the record key and data as a two-part key (see the
* dbi/DupKeyData class) and storing the resulting array in entryKeys.
* A special case is when the record to be embedded has no data. Then,
* the two-part key format is not used, but instead the NO_DATA_LN_BIT
* is turned on in the slot's state. This saves the space overhead of
* using the two-part key format.
*/
INKeyRep entryKeys;
byte[] keyPrefix;
/*
* The following entryLsnXXX fields are used for storing LSNs. There are
* two possible representations: a byte array based rep, and a long array
* based one. For compactness, the byte array rep is used initially. A
* single byte[] that uses four bytes per LSN is used. The baseFileNumber
* field contains the lowest file number of any LSN in the array. Then for
* each entry (four bytes each), the first byte contains the offset from
* the baseFileNumber of that LSN's file number. The remaining three bytes
* contain the file offset portion of the LSN. Three bytes will hold a
* maximum offset of 16,777,214 (0xfffffe), so with the default JE log file
* size of 10,000,000 bytes this works well.
*
* If either (1) the difference in file numbers exceeds 127
* (Byte.MAX_VALUE) or (2) the file offset is greater than 16,777,214, then
* the byte[] based rep mutates to a long[] based rep.
*
* In the byte[] rep, DbLsn.NULL_LSN is represented by setting the file
* offset bytes for a given entry to -1 (0xffffff).
*
* Note: A compact representation will be changed to the non-compact one,
* if needed, but in the current implementation, the reverse mutation
* (from long to compact) never takes place.
*/
long baseFileNumber;
byte[] entryLsnByteArray;
long[] entryLsnLongArray;
public static boolean disableCompactLsns; // DbCacheSize only
/*
* The children of this IN. Only the ones that are actually in the cache
* have non-null entries. Specialized sparse array represents are used to
* represent the entries. The representation can mutate as modifications
* are made to it.
*/
INTargetRep entryTargets;
/*
* In a level 2 IN, the LRU IDs of the child BINs.
*/
private INLongRep offHeapBINIds = EMPTY_OFFHEAP_BIN_IDS;
long inMemorySize;
/*
* accumluted memory budget delta. Once this exceeds
* MemoryBudget.ACCUMULATED_LIMIT we inform the MemoryBudget that a change
* has occurred. See SR 12273.
*/
private int accumulatedDelta = 0;
/*
* Max allowable accumulation of memory budget changes before MemoryBudget
* should be updated. This allows for consolidating multiple calls to
* updateXXXMemoryBudget() into one call. Not declared final so that the
* unit tests can modify it. See SR 12273.
*/
public static final int ACCUMULATED_LIMIT_DEFAULT = 1000;
public static int ACCUMULATED_LIMIT = ACCUMULATED_LIMIT_DEFAULT;
/**
* References to the next and previous nodes in an LRU list. If the node
* is not in any LRUList, both of these will be null. If the node is at
* the front/back of an LRUList, prevLRUNode/nextLRUNode will point to
* the node itself.
*/
private IN nextLRUNode = null;
private IN prevLRUNode = null;
/*
* Let L be the most recently written logrec for this IN instance.
* (a) If this is a UIN, lastFullVersion is the lsn of L.
* (b) If this is a BIN instance and L is a full-version logrec,
* lastFullVersion is the lsn of L.
* (c) If this is a BIN instance and L is a delta logrec, lastFullVersion
* is the lsn of the most recently written full-version logrec for the
* same BIN.
*
* It is set in 2 cases:
*
* (a) after "this" is created via reading a logrec L, lastFullVersion is
* set to L's lsn, if L is a UIN or a full BIN. (this is done in
* IN.postFetch/RecoveryInit(), via IN.setLastLoggedLsn()). If L is a BIN
* delta, lastFullVersion is set by BINDeltaLogEntry.readEntry() to
* L.prevFullLsn.
*
* (b) After logging a UIN or a full-BIN logrec, it is set to the LSN of
* the logrec written. This is done in IN.afterLog().
*
* Notice that this is a persistent field, but except from case (c), when
* reading a logrec L, it is set not to the value found in L, but to the
* lsn of L. This is why its read/write is managed by the INLogEntry class
* rather than the IN readFromLog/writeFromLog methods.
*/
long lastFullVersion = DbLsn.NULL_LSN;
/*
* BINs have a lastDeltaVersion data field as well, which is defined as
* follows:
*
* Let L be the most recently written logrec for this BIN instance. If
* L is a full-version logrec, lastDeltaVersion is NULL; otherwise it
* is the lsn of L.
*
* It is used for obsolete tracking.
*
* It is set in 2 cases:
*
* (a) after "this" is created via reading a logrec L, lastDeltaVersion
* is set to L's lsn, if L is a BIN-delta logrec, or to NULL if L is a
* full-BIN logrec (this is done in IN.postFetch/RecoveryInit(), via
* BIN.setLastLoggedLsn()).
*
* (b) After we write a logrec L for this BIN instance, lastDeltaVersion
* is set to NULL if L is a full-BIN logrec, or to L's lsn, if L is a
* BIN-delta logrec (this is done in BIN.afterLog()).
*
* Notice that this is a persistent field, but when reading a logrec L,
* it is set not to the value found in L, but to the lsn of L. This is why
* its read/write is managed by the INLogEntry class rather than the IN
* readFromLog/writeFromLog methods.
*
* private long lastDeltaVersion = DbLsn.NULL_LSN;
*/
/*
* A sequence of obsolete info that cannot be counted as obsolete until an
* ancestor IN is logged non-provisionally.
*/
private PackedObsoleteInfo provisionalObsolete;
/* See convertDupKeys. */
private boolean needDupKeyConversion;
private int pinCount = 0;
private SharedLatch latch;
private IN parent;
private TestHook fetchINHook;
/**
* Create an empty IN, with no node ID, to be filled in from the log.
*/
public IN() {
init(null, Key.EMPTY_KEY, 0, 0);
}
/**
* Create a new IN.
*/
public IN(
DatabaseImpl dbImpl,
byte[] identifierKey,
int capacity,
int level) {
nodeId = dbImpl.getEnv().getNodeSequence().getNextLocalNodeId();
init(dbImpl, identifierKey, capacity,
generateLevel(dbImpl.getId(), level));
initMemorySize();
}
/**
* For Sizeof.
*/
public IN(@SuppressWarnings("unused") SizeofMarker marker) {
/*
* Set all variable fields to null, since they are not part of the
* fixed overhead.
*/
entryTargets = null;
entryKeys = null;
keyPrefix = null;
entryLsnByteArray = null;
entryLsnLongArray = null;
entryStates = null;
latch = LatchFactory.createSharedLatch(
LatchSupport.DUMMY_LATCH_CONTEXT, isAlwaysLatchedExclusively());
/*
* Use the latch to force it to grow to "runtime size".
*/
latch.acquireExclusive();
latch.release();
latch.acquireExclusive();
latch.release();
}
/**
* Create a new IN. Need this because we can't call newInstance() without
* getting a 0 for nodeId.
*/
IN createNewInstance(
byte[] identifierKey,
int maxEntries,
int level) {
return new IN(databaseImpl, identifierKey, maxEntries, level);
}
/**
* Initialize IN object.
*/
protected void init(
DatabaseImpl db,
@SuppressWarnings("hiding")
byte[] identifierKey,
int initialCapacity,
@SuppressWarnings("hiding")
int level) {
setDatabase(db);
latch = LatchFactory.createSharedLatch(
this, isAlwaysLatchedExclusively());
flags = 0;
nEntries = 0;
this.identifierKey = identifierKey;
entryTargets = INTargetRep.NONE;
entryKeys = new INKeyRep.Default(initialCapacity);
keyPrefix = null;
baseFileNumber = -1;
/*
* Normally we start out with the compact LSN rep and then mutate to
* the long rep when needed. But for some purposes (DbCacheSize) we
* start out with the long rep and never use the compact rep.
*/
if (disableCompactLsns) {
entryLsnByteArray = null;
entryLsnLongArray = new long[initialCapacity];
} else {
entryLsnByteArray = new byte[initialCapacity << 2];
entryLsnLongArray = null;
}
entryStates = new byte[initialCapacity];
this.level = level;
}
@Override
public final boolean isIN() {
return true;
}
@Override
public final boolean isUpperIN() {
return !isBIN();
}
@Override
public final String getLatchName() {
return shortClassName() + getNodeId();
}
@Override
public final int getLatchTimeoutMs() {
return databaseImpl.getEnv().getLatchTimeoutMs();
}
@Override
public final LatchTable getLatchTable() {
return LatchSupport.btreeLatchTable;
}
/*
* Return whether the shared latch for this kind of node should be of the
* "always exclusive" variety. Presently, only IN's are actually latched
* shared. BINs are latched exclusive only.
*/
boolean isAlwaysLatchedExclusively() {
return false;
}
/**
* Latch this node if it is not latched by another thread. Update the LRU
* using the given cacheMode if the latch succeeds.
*/
public final boolean latchNoWait(CacheMode cacheMode) {
if (!latch.acquireExclusiveNoWait()) {
return false;
}
updateLRU(cacheMode);
return true;
}
/**
* Latch this node exclusive and update the LRU using the given cacheMode.
*/
public void latch(CacheMode cacheMode) {
latch.acquireExclusive();
updateLRU(cacheMode);
}
/**
* Latch this node exclusive and update the LRU using the default cacheMode.
*/
public final void latch() {
latch(CacheMode.DEFAULT);
}
/**
* Latch this node shared and update the LRU using the given cacheMode.
*/
@Override
public void latchShared(CacheMode cacheMode) {
latch.acquireShared();
updateLRU(cacheMode);
}
/**
* Latch this node shared and update the LRU using the default cacheMode.
*/
@Override
public final void latchShared() {
latchShared(CacheMode.DEFAULT);
}
/**
* Latch this node exclusive and do not update the LRU or cause other
* related side effects.
*
* @param db is passed in order to initialize the database for an
* uninitialized node, which is necessary in order to latch it.
*/
public final void latchNoUpdateLRU(DatabaseImpl db) {
if (databaseImpl == null) {
databaseImpl = db;
}
latch.acquireExclusive();
}
/**
* Latch this node exclusive and do not update the LRU or cause other
* related side effects.
*/
public final void latchNoUpdateLRU() {
assert databaseImpl != null;
latch.acquireExclusive();
}
/**
* Release the latch on this node.
*/
@Override
public final void releaseLatch() {
latch.release();
}
/**
* Release the latch on this node if it is owned.
*/
public final void releaseLatchIfOwner() {
latch.releaseIfOwner();
}
/**
* @return true if this thread holds the IN's latch
*/
public final boolean isLatchOwner() {
return latch.isOwner();
}
public final boolean isLatchExclusiveOwner() {
return latch.isExclusiveOwner();
}
/* For unit testing. */
public final int getLatchNWaiters() {
return latch.getNWaiters();
}
public final void updateLRU(CacheMode cacheMode) {
if (!getInListResident()) {
return;
}
switch (cacheMode) {
case UNCHANGED:
case MAKE_COLD:
break;
case DEFAULT:
case EVICT_LN:
case EVICT_BIN:
case KEEP_HOT:
setFetchedCold(false);
setFetchedColdOffHeap(false);
if (isBIN() || !hasCachedChildrenFlag()) {
assert(isBIN() || !hasCachedChildren());
getEvictor().moveBack(this);
}
break;
default:
assert false;
}
}
/**
* This method should be used carefully. Unless this node and the parent
* are already known to be latched, call latchParent instead to access the
* parent safely.
*/
public IN getParent() {
return parent;
}
public void setParent(IN in) {
assert in != null;
/*
* Must hold EX-latch when changing a non-null parent. But when setting
* the parent initially (it is currently null), we assume it is being
* attached and no other threads have access to it.
*/
if (parent != null && !isLatchExclusiveOwner()) {
throw unexpectedState();
}
parent = in;
}
/**
* Latches the parent exclusively, leaving this node latched. The parent
* must not already be latched.
*
* This node must be latched on entry and will be latched on exit. This
* node's latch may be released temporarily, in which case it will be
* ex-latched (since the parent is ex-latched, this isn't a drawback).
*
* Does not perform cache mode processing, since this node is already
* latched.
*
* @return the ex-latched parent, for which calling getKnownChildIndex with
* this node is guaranteed to succeed.
*
* @throws EnvironmentFailureException (fatal) if the parent latch is
* already held.
*/
public final IN latchParent() {
assert latch.isOwner();
assert !isRoot();
assert getParent() != null;
while (true) {
final IN p = getParent();
if (p.latch.acquireExclusiveNoWait()) {
return p;
}
pin();
try {
latch.release();
p.latch.acquireExclusive();
latch.acquireExclusive();
} finally {
unpin();
}
if (getParent() == p) {
return p;
}
p.latch.release();
}
}
/**
* Returns the index of the given child. Should only be called when the
* caller knows that the given child is resident.
*/
public int getKnownChildIndex(final Node child) {
for (int i = 0; i < nEntries; i += 1) {
if (getTarget(i) == child) {
return i;
}
}
throw unexpectedState();
}
public final synchronized void pin() {
assert(isLatchOwner());
assert(pinCount >= 0);
++pinCount;
}
public final synchronized void unpin() {
assert(pinCount > 0);
--pinCount;
}
public final synchronized boolean isPinned() {
assert(isLatchExclusiveOwner());
assert(pinCount >= 0);
return pinCount > 0;
}
/**
* Get the database for this IN.
*/
public final DatabaseImpl getDatabase() {
return databaseImpl;
}
/**
* Set the database reference for this node.
*/
public final void setDatabase(DatabaseImpl db) {
databaseImpl = db;
}
/*
* Get the database id for this node.
*/
public final DatabaseId getDatabaseId() {
return databaseImpl.getId();
}
@Override
public final EnvironmentImpl getEnvImplForFatalException() {
return databaseImpl.getEnv();
}
public final EnvironmentImpl getEnv() {
return databaseImpl.getEnv();
}
final Evictor getEvictor() {
return databaseImpl.getEnv().getEvictor();
}
final OffHeapCache getOffHeapCache() {
return databaseImpl.getEnv().getOffHeapCache();
}
/**
* Convenience method to return the database key comparator.
*/
public final Comparator<byte[]> getKeyComparator() {
return databaseImpl.getKeyComparator();
}
@Override
public final int getLevel() {
return level;
}
public final int getNormalizedLevel() {
return level & LEVEL_MASK;
}
private static int generateLevel(DatabaseId dbId, int newLevel) {
if (dbId.equals(DbTree.ID_DB_ID)) {
return newLevel | DBMAP_LEVEL;
} else {
return newLevel | MAIN_LEVEL;
}
}
public final long getNodeId() {
return nodeId;
}
/* For unit tests only. */
final void setNodeId(long nid) {
nodeId = nid;
}
/**
* We would like as even a hash distribution as possible so that the
* Evictor's LRU is as accurate as possible. ConcurrentHashMap takes the
* value returned by this method and runs its own hash algorithm on it.
* So a bit complement of the node ID is sufficient as the return value and
* is a little better than returning just the node ID. If we use a
* different container in the future that does not re-hash the return
* value, we should probably implement the Wang-Jenkins hash function here.
*/
@Override
public final int hashCode() {
return (int) ~getNodeId();
}
@Override
public final boolean equals(Object obj) {
if (!(obj instanceof IN)) {
return false;
}
IN in = (IN) obj;
return (this.getNodeId() == in.getNodeId());
}
/**
* Sort based on equality key.
*/
public final int compareTo(IN argIN) {
long argNodeId = argIN.getNodeId();
long myNodeId = getNodeId();
if (myNodeId < argNodeId) {
return -1;
} else if (myNodeId > argNodeId) {
return 1;
} else {
return 0;
}
}
public final boolean getDirty() {
return (flags & IN_DIRTY_BIT) != 0;
}
public final void setDirty(boolean dirty) {
if (dirty) {
flags |= IN_DIRTY_BIT;
} else {
flags &= ~IN_DIRTY_BIT;
}
}
@Override
public final boolean isBINDelta() {
assert(isUpperIN() || isLatchOwner());
return (flags & IN_DELTA_BIT) != 0;
}
/*
* This version of isBINDelta() takes a checkLatched param to allow
* for cases where it is ok to call the method without holding the
* BIN latch (e.g. in single-threaded tests, or when the BIN is not
* attached to the tree (and thus inaccessible from other threads)).
*/
@Override
public final boolean isBINDelta(boolean checkLatched) {
assert(!checkLatched || isUpperIN() || isLatchOwner());
return (flags & IN_DELTA_BIT) != 0;
}
final void setBINDelta(boolean delta) {
if (delta) {
flags |= IN_DELTA_BIT;
} else {
flags &= ~IN_DELTA_BIT;
}
}
/**
* Indicates that the BIN was fetched from disk, or loaded from the
* off-heap cache, using CacheMode.UNCHANGED, and has not been accessed
* with another CacheMode. BINs in this state should be evicted from main
* cache as soon as they are no longer referenced by a cursor. If they were
* loaded from off-heap cache, they should be stored off-heap when they are
* evicted from main. The FetchedColdOffHeap flag indicates whether the
* BIN was loaded from off-heap cache.
*/
public final boolean getFetchedCold() {
return (flags & IN_FETCHED_COLD_BIT) != 0;
}
/** @see #getFetchedCold() */
public final void setFetchedCold(boolean val) {
if (val) {
flags |= IN_FETCHED_COLD_BIT;
} else {
flags &= ~IN_FETCHED_COLD_BIT;
}
}
/** @see #getFetchedCold() */
public final boolean getFetchedColdOffHeap() {
return (flags & IN_FETCHED_COLD_OFFHEAP_BIT) != 0;
}
/** @see #getFetchedCold() */
public final void setFetchedColdOffHeap(boolean val) {
if (val) {
flags |= IN_FETCHED_COLD_OFFHEAP_BIT;
} else {
flags &= ~IN_FETCHED_COLD_OFFHEAP_BIT;
}
}
public final boolean getRecalcToggle() {
return (flags & IN_RECALC_TOGGLE_BIT) != 0;
}
public final void setRecalcToggle(boolean toggle) {
if (toggle) {
flags |= IN_RECALC_TOGGLE_BIT;
} else {
flags &= ~IN_RECALC_TOGGLE_BIT;
}
}
public final boolean isRoot() {
return (flags & IN_IS_ROOT_BIT) != 0;
}
final void setIsRoot(boolean isRoot) {
setIsRootFlag(isRoot);
setDirty(true);
}
private void setIsRootFlag(boolean isRoot) {
if (isRoot) {
flags |= IN_IS_ROOT_BIT;
} else {
flags &= ~IN_IS_ROOT_BIT;
}
}
public final boolean hasCachedChildrenFlag() {
return (flags & IN_HAS_CACHED_CHILDREN_BIT) != 0;
}
private void setHasCachedChildrenFlag(boolean value) {
if (value) {
flags |= IN_HAS_CACHED_CHILDREN_BIT;
} else {
flags &= ~IN_HAS_CACHED_CHILDREN_BIT;
}
}
public final boolean isInPri2LRU() {
return (flags & IN_PRI2_LRU_BIT) != 0;
}
/* public for unit tests */
public final void setInPri2LRU(boolean value) {
if (value) {
flags |= IN_PRI2_LRU_BIT;
} else {
flags &= ~IN_PRI2_LRU_BIT;
}
}
public boolean isExpirationInHours() {
return (flags & IN_EXPIRATION_IN_HOURS) != 0;
}
void setExpirationInHours(boolean value) {
if (value) {
flags |= IN_EXPIRATION_IN_HOURS;
} else {
flags &= ~IN_EXPIRATION_IN_HOURS;
}
}
/**
* @return the identifier key for this node.
*/
public final byte[] getIdentifierKey() {
return identifierKey;
}
/**
* Set the identifier key for this node.
*
* @param key - the new identifier key for this node.
*
* @param makeDirty should normally be true, but may be false when an
* expired slot containing the identifier key has been deleted.
*/
public final void setIdentifierKey(byte[] key, boolean makeDirty) {
assert(!isBINDelta());
/*
* The identifierKey is "intentionally" not kept track of in the
* memory budget. If we did, then it would look like this:
int oldIDKeySz = (identifierKey == null) ?
0 :
MemoryBudget.byteArraySize(identifierKey.length);
int newIDKeySz = (key == null) ?
0 :
MemoryBudget.byteArraySize(key.length);
updateMemorySize(newIDKeySz - oldIDKeySz);
*/
identifierKey = key;
if (makeDirty) {
setDirty(true);
}
}
/**
* @return the number of entries in this node.
*/
public final int getNEntries() {
return nEntries;
}
/**
* @return the maximum number of entries in this node.
*
* Overriden by TestIN in INEntryTestBase.java
*/
public int getMaxEntries() {
return entryStates.length;
}
public final byte getState(int idx) {
return entryStates[idx];
}
/**
* @return true if the object is dirty.
*/
public final boolean isDirty(int idx) {
return ((entryStates[idx] & EntryStates.DIRTY_BIT) != 0);
}
/**
* @return true if the idx'th entry has been deleted, although the
* transaction that performed the deletion may not be committed.
*/
public final boolean isEntryPendingDeleted(int idx) {
return ((entryStates[idx] & EntryStates.PENDING_DELETED_BIT) != 0);
}
/**
* Set pendingDeleted to true.
*/
public final void setPendingDeleted(int idx) {
entryStates[idx] |= EntryStates.PENDING_DELETED_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/**
* Set pendingDeleted to false.
*/
final void clearPendingDeleted(int idx) {
entryStates[idx] &= EntryStates.CLEAR_PENDING_DELETED_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/**
* @return true if the idx'th entry is deleted for sure. If a transaction
* performed the deletion, it has been committed.
*/
public final boolean isEntryKnownDeleted(int idx) {
return ((entryStates[idx] & EntryStates.KNOWN_DELETED_BIT) != 0);
}
/**
* Set KD flag to true and clear the PD flag (PD does not need to be on
* if KD is on).
*/
public final void setKnownDeleted(int idx) {
assert(isBIN());
entryStates[idx] |= EntryStates.KNOWN_DELETED_BIT;
entryStates[idx] &= EntryStates.CLEAR_PENDING_DELETED_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/**
* Set knownDeleted flag to true and evict the child LN if cached. The
* child LN is evicted to save memory, since it will never be fetched
* again.
*/
public final void setKnownDeletedAndEvictLN(int index) {
assert(isBIN());
setKnownDeleted(index);
LN oldLN = (LN) getTarget(index);
if (oldLN != null) {
updateMemorySize(oldLN, null /* newNode */);
}
setTarget(index, null);
}
/**
* Set knownDeleted to false.
*/
final void clearKnownDeleted(int idx) {
assert(isBIN());
entryStates[idx] &= EntryStates.CLEAR_KNOWN_DELETED_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/*
* In the future we may want to move the following static methods to an
* EntryState utility class and share all state bit twidling among IN,
* ChildReference, and DeltaInfo.
*/
/**
* Returns true if the given state is known deleted.
*/
static boolean isStateKnownDeleted(byte state) {
return ((state & EntryStates.KNOWN_DELETED_BIT) != 0);
}
/**
* Returns true if the given state is pending deleted.
*/
static boolean isStatePendingDeleted(byte state) {
return ((state & EntryStates.PENDING_DELETED_BIT) != 0);
}
/**
* Return true if the LN at the given slot is embedded.
*/
public final boolean isEmbeddedLN(int idx) {
return ((entryStates[idx] & EntryStates.EMBEDDED_LN_BIT) != 0);
}
public static boolean isEmbeddedLN(byte state) {
return ((state & EntryStates.EMBEDDED_LN_BIT) != 0);
}
/**
* Set embeddedLN to true.
*/
private void setEmbeddedLN(int idx) {
entryStates[idx] |= EntryStates.EMBEDDED_LN_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/**
* Set embeddedLN to false.
*/
private void clearEmbeddedLN(int idx) {
entryStates[idx] &= EntryStates.CLEAR_EMBEDDED_LN_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/**
* Return true if the LN at the given slot is an embedded LN with no data.
*/
public final boolean isNoDataLN(int idx) {
return ((entryStates[idx] & EntryStates.NO_DATA_LN_BIT) != 0);
}
public static boolean isNoDataLN(byte state) {
return ((state & EntryStates.NO_DATA_LN_BIT) != 0);
}
/**
* Set noDataLN to true.
*/
void setNoDataLN(int idx) {
entryStates[idx] |= EntryStates.NO_DATA_LN_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/**
* Set noDataLN to false.
*/
private void clearNoDataLN(int idx) {
entryStates[idx] &= EntryStates.CLEAR_NO_DATA_LN_BIT;
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
/*
*
*/
public final boolean haveEmbeddedData(int idx) {
return (isEmbeddedLN(idx) && !isNoDataLN(idx));
}
/* For unit testing */
public final int getNumEmbeddedLNs() {
int res = 0;
for (int i = 0; i < getNEntries(); ++i) {
if (isEmbeddedLN(i)) {
++res;
}
}
return res;
}
/* For unit testing */
public final INKeyRep getKeyVals() {
return entryKeys;
}
public final byte[] getKeyPrefix() {
return keyPrefix;
}
/*
* For unit testing only
*/
public final boolean hasKeyPrefix() {
return keyPrefix != null;
}
/* This has default protection for access by the unit tests. */
final void setKeyPrefix(byte[] keyPrefix) {
assert databaseImpl != null;
int prevLength = (this.keyPrefix == null) ? 0 : this.keyPrefix.length;
this.keyPrefix = keyPrefix;
/* Update the memory budgeting to reflect changes in the key prefix. */
int currLength = (keyPrefix == null) ? 0 : keyPrefix.length;
updateMemorySize(prevLength, currLength);
}
/**
* Return the idx'th key. If prefixing is enabled, construct a new byte[]
* containing the prefix and suffix. If prefixing is not enabled, just
* return the current byte[] in entryKeys.
*/
public final byte[] getKey(int idx) {
assert idx < nEntries;
byte[] key = entryKeys.getFullKey(
keyPrefix, idx, haveEmbeddedData(idx));
assert(key != null);
return key;
}
public final byte[] getData(int idx) {
if (haveEmbeddedData(idx)) {
return entryKeys.getData(idx);
}
if (isNoDataLN(idx)) {
return Key.EMPTY_KEY;
}
return null;
}
/**
* Returns the size of the key that is stored persistently, which will be
* the combined key-data for an embedded LN or duplicated DB record.
*/
int getStoredKeySize(int idx) {
return entryKeys.size(idx);
}
/**
* Updates the key in the idx-th slot of this BIN, if the DB allows key
* updates and the new key is not identical to the current key in the slot.
* It also updates the data (if any) that is embedded with the key in the
* idx-slot, or embeds new data in that slot, is the "data" param is
* non-null, or removes embedded data, if "data" is null. Finally, it
* sets the EMBEDDED_LN_BIT and NO_DATA_LN_BIT flags in the slot's state.
*
* @param key is the key to set in the slot and is the LN key.
*
* @param data If the data portion of a record must be embedded in this
* BIN, "data" stores the record's data. Null otherwise. See also comment
* for the keyEntries field.
*
* @return true if a multi-slot change was made and the complete IN memory
* size must be updated.
*/
private boolean updateLNSlotKey(int idx, byte[] key, byte[] data) {
assert(isBIN());
boolean haveEmbeddedData = haveEmbeddedData(idx);
if (data == null) {
if (isEmbeddedLN(idx)) {
clearEmbeddedLN(idx);
clearNoDataLN(idx);
}
} else {
if (!isEmbeddedLN(idx)) {
setEmbeddedLN(idx);
}
if (data.length == 0) {
setNoDataLN(idx);
} else {
clearNoDataLN(idx);
}
}
/*
* The new key may be null if a dup LN was deleted, in which case there
* is no need to update it. There is no need to compare keys if there
* is no comparator configured, since a key cannot be changed when the
* default comparator is used.
*/
if (key != null &&
(databaseImpl.allowsKeyUpdates() ||
DupConvert.needsConversion(databaseImpl)) &&
!Arrays.equals(key, getKey(idx))) {
setDirty(true);
return setKey(idx, key, data, false);
} else if (haveEmbeddedData) {
/*
* The key does not change, but the slot contains embedded data,
* which must now either be removed (if data == null or
* data.length == 0) or updated.
* TODO #21488: update the data only if it actually changes.
*/
setDirty(true);
entryStates[idx] |= EntryStates.DIRTY_BIT;
INKeyRep.Type oldRepType = entryKeys.getType();
entryKeys = entryKeys.setData(idx, data, this);
return oldRepType != entryKeys.getType();
} else if (data != null && data.length != 0) {
/*
* The key does not change, but we now have to embed data in a slot
* that does not currently have embedded data.
*/
setDirty(true);
entryStates[idx] |= EntryStates.DIRTY_BIT;
key = entryKeys.getKey(idx, false);
INKeyRep.Type oldRepType = entryKeys.getType();
entryKeys = entryKeys.set(idx, key, data, this);
return oldRepType != entryKeys.getType();
} else {
return false;
}
}
/*
* Convenience wrapper for setKey() method below
*/
private boolean insertKey(
int idx,
byte[] key,
byte[] data) {
/*
* Set the id key when inserting the first entry. This is important
* when compression removes all entries from a BIN, and then an entry
* is inserted before the empty BIN is purged.
*/
if (nEntries == 1 && !isBINDelta()) {
setIdentifierKey(key, true /*makeDirty*/);
}
return setKey(idx, key, data, true);
}
// TODO re-enable this and figure out why it is firing
private boolean idKeyIsSlotKey() {
if (true) {
return true;
}
if (!isBIN() || nEntries == 0) {
return true;
}
for (int i = 0; i < nEntries; i += 1) {
if (entryKeys.compareKeys(
identifierKey, keyPrefix, i, haveEmbeddedData(i),
databaseImpl.getKeyComparator()) == 0) {
return true;
}
}
return false;
}
/*
* Convenience wrapper for setKey() method below. It is used for
* upper INs only, so no need to worry about the EMBEDDED_LN_BIT
* and NO_DATA_LN_BIT flags.
*/
private boolean updateKey(
int idx,
byte[] key,
byte[] data) {
return setKey(idx, key, data, false);
}
/**
* This method inserts or updates a key at a given slot. In either case,
* the associated embedded data (if any) is inserted or updated as well,
* and the key prefix is adjusted, if necessary.
*
* In case of insertion (indicated by a true value for the isInsertion
* param), it is assumed that the idx slot does not store any valid info,
* so any change to the key prefix (if any) is due to the insertion of
* this new new key and not to the removal of the current key at the idx
* slot.
*
* In case of update, the method does not check if the current key is
* indeed different from the new key; it just updates the key
* unconditionally. If the slot has embedded data, that data will also
* be updated (if the data param is not null), or be removed (if the data
* param is null). If the slot does not have embedded data and the data
* param is not null, the given data will be embedded.
*
* Note: For BINs, the maintenance of the EMBEDDED_LN_BIT andNO_DATA_LN_BIT
* is done by the callers of this method.
*
* @param data If the data portion of a record must be embedded in this
* BIN, "data" stores the record's data. Null otherwise. See also comment
* for the keyEntries field.
*
* @return true if a multi-slot change was made and the complete IN memory
* size must be updated.
*/
public boolean setKey(
int idx,
byte[] key,
byte[] data,
boolean isInsertion) {
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
/*
* Only compute key prefix if prefixing is enabled and there's an
* existing prefix.
*/
if (databaseImpl.getKeyPrefixing() && keyPrefix != null) {
int newPrefixLen = Key.getKeyPrefixLength(
keyPrefix, keyPrefix.length, key);
if (newPrefixLen < keyPrefix.length) {
/*
* The new key doesn't share the current prefix, so recompute
* the prefix and readjust all the existing suffixes.
*/
byte[] newPrefix = (isInsertion ?
Key.createKeyPrefix(keyPrefix, key) :
computeKeyPrefix(idx));
if (newPrefix != null) {
/* Take the new key into consideration for new prefix. */
newPrefix = Key.createKeyPrefix(newPrefix, key);
}
recalcSuffixes(newPrefix, key, data, idx);
return true;
} else {
INKeyRep.Type prevRepType = entryKeys.getType();
byte[] suffix = computeKeySuffix(keyPrefix, key);
entryKeys = entryKeys.set(idx, suffix, data, this);
return prevRepType != entryKeys.getType();
}
} else if (keyPrefix != null) {
/*
* Key prefixing has been turned off on this database, but there
* are existing prefixes. Remove prefixes for this IN.
*/
recalcSuffixes(null, key, data, idx);
return true;
} else {
INKeyRep.Type oldRepType = entryKeys.getType();
entryKeys = entryKeys.set(idx, key, data, this);
return oldRepType != entryKeys.getType();
}
}
/*
* Given 2 byte arrays, "prefix" and "key", where "prefix" is or stores
* a prefix of "key", allocate and return another byte array that stores
* the suffix of "key" w.r.t. "prefix".
*/
private static byte[] computeKeySuffix(byte[] prefix, byte[] key) {
int prefixLen = (prefix == null ? 0 : prefix.length);
if (prefixLen == 0) {
return key;
}
int suffixLen = key.length - prefixLen;
byte[] ret = new byte[suffixLen];
System.arraycopy(key, prefixLen, ret, 0, suffixLen);
return ret;
}
/*
* Iterate over all keys in this IN and recalculate their suffixes based on
* newPrefix. If keyVal and idx are supplied, it means that entry[idx] is
* about to be changed to keyVal so use that instead of
* entryKeys.get(idx) when computing the new suffixes. If idx is < 0,
* and keyVal is null, then recalculate suffixes for all entries in this.
*/
private void recalcSuffixes(
byte[] newPrefix,
byte[] key,
byte[] data,
int idx) {
for (int i = 0; i < nEntries; i++) {
byte[] curKey = (i == idx ? key : getKey(i));
byte[] curData = null;
if (i == idx) {
curData = data;
} else if (haveEmbeddedData(i)) {
curData = entryKeys.getData(i);
}
byte[] suffix = computeKeySuffix(newPrefix, curKey);
entryKeys = entryKeys.set(i, suffix, curData, this);
}
setKeyPrefix(newPrefix);
}
/**
* Forces computation of the key prefix, without requiring a split.
* Is public for use by DbCacheSize.
*/
public final void recalcKeyPrefix() {
assert(!isBINDelta());
recalcSuffixes(computeKeyPrefix(-1), null, null, -1);
}
/*
* Computes a key prefix based on all the keys in 'this'. Return null if
* the IN is empty or prefixing is not enabled or there is no common
* prefix for the keys.
*/
private byte[] computeKeyPrefix(int excludeIdx) {
if (!databaseImpl.getKeyPrefixing() || nEntries <= 1) {
return null;
}
int firstIdx = (excludeIdx == 0) ? 1 : 0;
byte[] curPrefixKey = getKey(firstIdx);
int prefixLen = curPrefixKey.length;
/*
* Only need to look at first and last entries when keys are ordered
* byte-by-byte. But when there is a comparator, keys are not
* necessarily ordered byte-by-byte. [#21328]
*/
boolean byteOrdered;
if (true) {
/* Disable optimization for now. Needs testing. */
byteOrdered = false;
} else {
byteOrdered = (databaseImpl.getKeyComparator() == null);
}
if (byteOrdered) {
int lastIdx = nEntries - 1;
if (lastIdx == excludeIdx) {
lastIdx -= 1;
}
if (lastIdx > firstIdx) {
byte[] lastKey = getKey(lastIdx);
int newPrefixLen = Key.getKeyPrefixLength(
curPrefixKey, prefixLen, lastKey);
if (newPrefixLen < prefixLen) {
curPrefixKey = lastKey;
prefixLen = newPrefixLen;
}
}
} else {
for (int i = firstIdx + 1; i < nEntries; i++) {
if (i == excludeIdx) {
continue;
}
byte[] curKey = getKey(i);
int newPrefixLen = Key.getKeyPrefixLength(
curPrefixKey, prefixLen, curKey);
if (newPrefixLen < prefixLen) {
curPrefixKey = curKey;
prefixLen = newPrefixLen;
}
}
}
byte[] ret = new byte[prefixLen];
System.arraycopy(curPrefixKey, 0, ret, 0, prefixLen);
return ret;
}
/*
* For debugging.
*/
final boolean verifyKeyPrefix() {
byte[] computedKeyPrefix = computeKeyPrefix(-1);
if (keyPrefix == null) {
return computedKeyPrefix == null;
}
if (computedKeyPrefix == null ||
computedKeyPrefix.length < keyPrefix.length) {
System.out.println("VerifyKeyPrefix failed");
System.out.println(dumpString(0, false));
return false;
}
for (int i = 0; i < keyPrefix.length; i++) {
if (keyPrefix[i] != computedKeyPrefix[i]) {
System.out.println("VerifyKeyPrefix failed");
System.out.println(dumpString(0, false));
return false;
}
}
return true;
}
/**
* Returns whether the given key is greater than or equal to the first key
* in the IN and less than or equal to the last key in the IN. This method
* is used to determine whether a key to be inserted belongs in this IN,
* without doing a tree search. If false is returned it is still possible
* that the key belongs in this IN, but a tree search must be performed to
* find out.
*/
public final boolean isKeyInBounds(byte[] key) {
assert(!isBINDelta());
if (nEntries < 2) {
return false;
}
Comparator<byte[]> comparator = getKeyComparator();
int cmp;
/* Compare key given to my first key. */
cmp = entryKeys.compareKeys(
key, keyPrefix, 0, haveEmbeddedData(0), comparator);
if (cmp < 0) {
return false;
}
/* Compare key given to my last key. */
int idx = nEntries - 1;
cmp = entryKeys.compareKeys(
key, keyPrefix, idx, haveEmbeddedData(idx), comparator);
return cmp <= 0;
}
/**
* Return the idx'th LSN for this entry.
*
* @return the idx'th LSN for this entry.
*/
public final long getLsn(int idx) {
if (entryLsnLongArray == null) {
int offset = idx << 2;
int fileOffset = getFileOffset(offset);
if (fileOffset == -1) {
return DbLsn.NULL_LSN;
} else {
return DbLsn.makeLsn((baseFileNumber +
getFileNumberOffset(offset)),
fileOffset);
}
} else {
return entryLsnLongArray[idx];
}
}
/**
* Set the LSN of the idx'th slot, mark the slot dirty, and update
* memory consuption. Throw exception if the update is not legitimate.
*/
public void setLsn(int idx, long lsn) {
setLsn(idx, lsn, true);
}
/**
* Set the LSN of the idx'th slot, mark the slot dirty, and update
* memory consuption. If "check" is true, throw exception if the
* update is not legitimate.
*/
private void setLsn(int idx, long lsn, boolean check) {
if (!check || shouldUpdateLsn(getLsn(idx), lsn)) {
int oldSize = computeLsnOverhead();
/* setLsnInternal can mutate to an array of longs. */
setLsnInternal(idx, lsn);
updateMemorySize(computeLsnOverhead() - oldSize);
entryStates[idx] |= EntryStates.DIRTY_BIT;
setDirty(true);
}
}
/*
* Set the LSN of the idx'th slot. If the current storage for LSNs is the
* compact one, mutate it to the the non-compact, if necessary.
*/
final void setLsnInternal(int idx, long value) {
/* Will implement this in the future. Note, don't adjust if mutating.*/
//maybeAdjustCapacity(offset);
if (entryLsnLongArray != null) {
entryLsnLongArray[idx] = value;
return;
}
int offset = idx << 2;
if (value == DbLsn.NULL_LSN) {
setFileNumberOffset(offset, (byte) 0);
setFileOffset(offset, -1);
return;
}
long thisFileNumber = DbLsn.getFileNumber(value);
if (baseFileNumber == -1) {
/* First entry. */
baseFileNumber = thisFileNumber;
setFileNumberOffset(offset, (byte) 0);
} else {
if (thisFileNumber < baseFileNumber) {
if (!adjustFileNumbers(thisFileNumber)) {
mutateToLongArray(idx, value);
return;
}
baseFileNumber = thisFileNumber;
}
long fileNumberDifference = thisFileNumber - baseFileNumber;
if (fileNumberDifference > Byte.MAX_VALUE) {
mutateToLongArray(idx, value);
return;
}
setFileNumberOffset(
offset, (byte) (thisFileNumber - baseFileNumber));
//assert getFileNumberOffset(offset) >= 0;
}
int fileOffset = (int) DbLsn.getFileOffset(value);
if (fileOffset > MAX_FILE_OFFSET) {
mutateToLongArray(idx, value);
return;
}
setFileOffset(offset, fileOffset);
//assert getLsn(offset) == value;
}
private boolean adjustFileNumbers(long newBaseFileNumber) {
long oldBaseFileNumber = baseFileNumber;
for (int i = 0;
i < entryLsnByteArray.length;
i += BYTES_PER_LSN_ENTRY) {
if (getFileOffset(i) == -1) {
continue;
}
long curEntryFileNumber =
oldBaseFileNumber + getFileNumberOffset(i);
long newCurEntryFileNumberOffset =
(curEntryFileNumber - newBaseFileNumber);
if (newCurEntryFileNumberOffset > Byte.MAX_VALUE) {
long undoOffset = oldBaseFileNumber - newBaseFileNumber;
for (int j = i - BYTES_PER_LSN_ENTRY;
j >= 0;
j -= BYTES_PER_LSN_ENTRY) {
if (getFileOffset(j) == -1) {
continue;
}
setFileNumberOffset
(j, (byte) (getFileNumberOffset(j) - undoOffset));
//assert getFileNumberOffset(j) >= 0;
}
return false;
}
setFileNumberOffset(i, (byte) newCurEntryFileNumberOffset);
//assert getFileNumberOffset(i) >= 0;
}
return true;
}
private void setFileNumberOffset(int offset, byte fileNumberOffset) {
entryLsnByteArray[offset] = fileNumberOffset;
}
private byte getFileNumberOffset(int offset) {
return entryLsnByteArray[offset];
}
private void setFileOffset(int offset, int fileOffset) {
put3ByteInt(offset + 1, fileOffset);
}
private int getFileOffset(int offset) {
return get3ByteInt(offset + 1);
}
private void put3ByteInt(int offset, int value) {
entryLsnByteArray[offset++] = (byte) value;
entryLsnByteArray[offset++] = (byte) (value >>> 8);
entryLsnByteArray[offset] = (byte) (value >>> 16);
}
private int get3ByteInt(int offset) {
int ret = (entryLsnByteArray[offset++] & 0xFF);
ret += (entryLsnByteArray[offset++] & 0xFF) << 8;
ret += (entryLsnByteArray[offset] & 0xFF) << 16;
if (ret == THREE_BYTE_NEGATIVE_ONE) {
ret = -1;
}
return ret;
}
private void mutateToLongArray(int idx, long value) {
int nElts = entryLsnByteArray.length >> 2;
long[] newArr = new long[nElts];
for (int i = 0; i < nElts; i++) {
newArr[i] = getLsn(i);
}
newArr[idx] = value;
entryLsnLongArray = newArr;
entryLsnByteArray = null;
}
/**
* For a deferred write database, ensure that information is not lost when
* a new LSN is assigned. Also ensures that a transient LSN is not
* accidentally assigned to a persistent entry.
*
* Because this method uses strict checking, prepareForSlotReuse must
* sometimes be called when a new logical entry is being placed in a slot,
* e.g., during an IN split or an LN slot reuse.
*
* The following transition is a NOOP and the LSN is not set:
* Any LSN to same value.
* The following transitions are allowed and cause the LSN to be set:
* Null LSN to transient LSN
* Null LSN to persistent LSN
* Transient LSN to persistent LSN
* Persistent LSN to new persistent LSN
* The following transitions should not occur and throw an exception:
* Transient LSN to null LSN
* Transient LSN to new transient LSN
* Persistent LSN to null LSN
* Persistent LSN to transient LSN
*
* The above imply that a transient or null LSN can overwrite only a null
* LSN.
*/
private final boolean shouldUpdateLsn(long oldLsn, long newLsn) {
/* Save a little computation in packing/updating an unchanged LSN. */
if (oldLsn == newLsn) {
return false;
}
/* The rules for a new null LSN can be broken in a read-only env. */
if (newLsn == DbLsn.NULL_LSN && getEnv().isReadOnly()) {
return true;
}
/* Enforce LSN update rules. Assume lsn != oldLsn. */
if (databaseImpl.isDeferredWriteMode()) {
if (oldLsn != DbLsn.NULL_LSN && DbLsn.isTransientOrNull(newLsn)) {
throw unexpectedState(
"DeferredWrite LSN update not allowed" +
" oldLsn = " + DbLsn.getNoFormatString(oldLsn) +
" newLsn = " + DbLsn.getNoFormatString(newLsn));
}
} else {
if (DbLsn.isTransientOrNull(newLsn)) {
throw unexpectedState(
"LSN update not allowed" +
" oldLsn = " + DbLsn.getNoFormatString(oldLsn) +
" newLsn = " + DbLsn.getNoFormatString(newLsn));
}
}
return true;
}
/* For unit tests. */
final long[] getEntryLsnLongArray() {
return entryLsnLongArray;
}
/* For unit tests. */
final byte[] getEntryLsnByteArray() {
return entryLsnByteArray;
}
/* For unit tests. */
final void initEntryLsn(int capacity) {
entryLsnLongArray = null;
entryLsnByteArray = new byte[capacity << 2];
baseFileNumber = -1;
}
/* Will implement this in the future. Note, don't adjust if mutating.*/
/***
private void maybeAdjustCapacity(int offset) {
if (entryLsnLongArray == null) {
int bytesNeeded = offset + BYTES_PER_LSN_ENTRY;
int currentBytes = entryLsnByteArray.length;
if (currentBytes < bytesNeeded) {
int newBytes = bytesNeeded +
(GROWTH_INCREMENT * BYTES_PER_LSN_ENTRY);
byte[] newArr = new byte[newBytes];
System.arraycopy(entryLsnByteArray, 0, newArr, 0,
currentBytes);
entryLsnByteArray = newArr;
for (int i = currentBytes;
i < newBytes;
i += BYTES_PER_LSN_ENTRY) {
setFileNumberOffset(i, (byte) 0);
setFileOffset(i, -1);
}
}
} else {
int currentEntries = entryLsnLongArray.length;
int idx = offset >> 2;
if (currentEntries < idx + 1) {
int newEntries = idx + GROWTH_INCREMENT;
long[] newArr = new long[newEntries];
System.arraycopy(entryLsnLongArray, 0, newArr, 0,
currentEntries);
entryLsnLongArray = newArr;
for (int i = currentEntries; i < newEntries; i++) {
entryLsnLongArray[i] = DbLsn.NULL_LSN;
}
}
}
}
***/
/**
* The last logged size is not stored for UINs.
*/
boolean isLastLoggedSizeStored(int idx) {
return false;
}
boolean mayHaveLastLoggedSizeStored() {
return false;
}
/**
* The last logged size is not stored for UINs.
*/
public void setLastLoggedSize(int idx, int lastLoggedSize) {
}
/**
* The last logged size is not stored for UINs.
*/
public void clearLastLoggedSize(int idx) {
}
/**
* The last logged size is not stored for UINs.
*/
void setLastLoggedSizeUnconditional(int idx, int lastLoggedSize) {
}
/**
* The last logged size is not stored for UINs.
*/
public int getLastLoggedSize(int idx) {
return 0;
}
public void setOffHeapBINId(int idx,
int val,
boolean pri2,
boolean dirty) {
assert getNormalizedLevel() == 2;
assert val >= 0;
setOffHeapBINPri2(idx, pri2);
setOffHeapBINDirty(idx, dirty);
final long newVal = val + 1;
final long oldVal = offHeapBINIds.get(idx);
if (oldVal == newVal) {
return;
}
assert oldVal == 0;
offHeapBINIds = offHeapBINIds.set(idx, newVal, this);
}
public void clearOffHeapBINId(int idx) {
assert getNormalizedLevel() == 2;
setOffHeapBINPri2(idx, false);
setOffHeapBINDirty(idx, false);
final long oldVal = offHeapBINIds.get(idx);
if (oldVal == 0) {
return;
}
offHeapBINIds = offHeapBINIds.set(idx, 0, this);
if (getInListResident() &&
getNormalizedLevel() == 2 &&
offHeapBINIds.isEmpty()) {
getEvictor().moveToPri1LRU(this);
}
}
public int getOffHeapBINId(int idx) {
return ((int) offHeapBINIds.get(idx)) - 1;
}
public boolean hasOffHeapBINIds() {
return !offHeapBINIds.isEmpty();
}
public long getOffHeapBINIdsMemorySize() {
return offHeapBINIds.getMemorySize();
}
private void setOffHeapBINDirty(int idx, boolean val) {
if (val) {
entryStates[idx] |= EntryStates.OFFHEAP_DIRTY_BIT;
} else {
entryStates[idx] &= EntryStates.CLEAR_OFFHEAP_DIRTY_BIT;
}
}
public boolean isOffHeapBINDirty(int idx) {
return (entryStates[idx] & EntryStates.OFFHEAP_DIRTY_BIT) != 0;
}
private void setOffHeapBINPri2(int idx, boolean val) {
if (val) {
entryStates[idx] |= EntryStates.OFFHEAP_PRI2_BIT;
} else {
entryStates[idx] &= EntryStates.CLEAR_OFFHEAP_PRI2_BIT;
}
}
public boolean isOffHeapBINPri2(int idx) {
return (entryStates[idx] & EntryStates.OFFHEAP_PRI2_BIT) != 0;
}
public final INTargetRep getTargets() {
return entryTargets;
}
/**
* Sets the idx'th target. No need to make dirty, that state only applies
* to key and LSN.
*
* <p>WARNING: This method does not update the memory budget. The caller
* must update the budget.</p>
*/
void setTarget(int idx, Node target) {
assert isLatchExclusiveOwner() :
"Not latched for write " + getClass().getName() +
" id=" + getNodeId();
final Node curChild = entryTargets.get(idx);
entryTargets = entryTargets.set(idx, target, this);
if (target != null && target.isIN()) {
((IN) target).setParent(this);
}
if (isUpperIN()) {
if (target == null) {
/*
* If this UIN has just lost its last cached child, set its
* hasCachedChildren flag to false and put it back to the
* LRU list.
*/
if (curChild != null &&
hasCachedChildrenFlag() &&
!hasCachedChildren()) {
setHasCachedChildrenFlag(false);
if (!isDIN()) {
if (traceLRU) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
" setTarget(): " +
" Adding UIN " + getNodeId() +
" to LRU after detaching chld " +
((IN) curChild).getNodeId());
}
getEvictor().addBack(this);
}
}
} else {
if (curChild == null &&
!hasCachedChildrenFlag()) {
setHasCachedChildrenFlag(true);
if (traceLRU) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
" setTarget(): " +
" Removing UIN " + getNodeId() +
" after attaching child " +
((IN) target).getNodeId());
}
getEvictor().remove(this);
}
}
}
}
/**
* Return the idx'th target.
*
* This method does not load children from off-heap cache, so it always
* returns null when then child is not in main cache. Note that when
* children are INs (this is not a BIN), when this method returns null it
* is does not imply that the child is non-dirty, because dirty BINs are
* stored off-heap. To fetch the current version from off-heap cache in
* that case, call loadIN instead.
*/
public final Node getTarget(int idx) {
return entryTargets.get(idx);
}
/**
* Returns the idx-th child of "this" upper IN, fetching the child from
* the log and attaching it to its parent if it is not already attached.
* This method is used during tree searches.
*
* On entry, the parent must be latched already.
*
* If the child must be fetched from the log, the parent is unlatched.
* After the disk read is done, the parent is relatched. However, due to
* splits, it may not be the correct parent anymore. If so, the method
* will return null, and the caller is expected to restart the tree search.
*
* On return, the parent will be latched, unless null is returned or an
* exception is thrown.
*
* The "searchKey" param is the key that the caller is looking for. It is
* used by this method in determining if, after a disk read, "this" is
* still the correct parent for the child. "searchKey" may be null if the
* caller is doing a LEFT or RIGHT search.
*/
final IN fetchINWithNoLatch(int idx,
byte [] searchKey) {
return fetchINWithNoLatch(idx, searchKey, null);
}
/**
* This variant of fetchIN() takes a SearchResult as a param, instead of
* an idx (it is used by Tree.getParentINForChildIN()). The ordinal number
* of the child to fetch is specified by result.index. result.index will
* be adjusted by this method if, after a disk read, the ordinal number
* of the child changes due to insertions, deletions or splits in the
* parent.
*/
final IN fetchINWithNoLatch(SearchResult result,
byte [] searchKey) {
return fetchINWithNoLatch(result.index, searchKey, result);
}
/**
* Provides the implementation of the above two methods.
*/
private IN fetchINWithNoLatch(
int idx,
byte [] searchKey,
SearchResult result) {
assert(isUpperIN());
assert(isLatchOwner());
final EnvironmentImpl envImpl = getEnv();
final OffHeapCache ohCache = envImpl.getOffHeapCache();
boolean isMiss = false;
boolean success = false;
IN child = (IN)entryTargets.get(idx);
if (child == null) {
final long lsn = getLsn(idx);
if (lsn == DbLsn.NULL_LSN) {
throw unexpectedState(makeFetchErrorMsg(
"NULL_LSN in upper IN", lsn, idx));
}
/*
* For safety we must get a copy of the BIN off-heap bytes while
* latched, but we can materialize the bytes while unlatched
* (further below) to reduce the work done while latched.
*/
byte[] ohBytes = null;
if (getNormalizedLevel() == 2) {
ohBytes = ohCache.getBINBytes(this, idx);
}
pin();
try {
releaseLatch();
TestHookExecute.doHookIfSet(fetchINHook);
if (ohBytes != null) {
child = ohCache.materializeBIN(envImpl, ohBytes);
} else {
final WholeEntry wholeEntry = envImpl.getLogManager().
getLogEntryAllowInvisibleAtRecovery(
lsn, getLastLoggedSize(idx));
final LogEntry logEntry = wholeEntry.getEntry();
child = (IN) logEntry.getResolvedItem(databaseImpl);
isMiss = true;
}
latch(CacheMode.UNCHANGED);
/*
* The following if statement relies on splits being logged
* immediately, or more precisely, the split node and its
* new sibling being logged immediately, while both siblings
* and their parent are latched exclusively. The reason for
* this is as follows:
*
* Let K be the search key. If we are doing a left-deep or
* right-deep search, K is -INF or +INF respectively.
*
* Let P be the parent IN (i.e., "this") and S be the slot at
* the idx position before P was unlatched above. Here, we
* view slots as independent objects, not identified by their
* position in an IN but by some unique (imaginary) and
* immutable id assigned to the slot when it is first inserted
* into an IN.
*
* Before unlatching P, S was the correct slot to follow down
* the tree looking for K. After P is unlatched and then
* relatched, let S' be the slot at the idx position, if P
* still has an idx position. We consider the following 2 cases:
*
* 1. S' exists and S'.LSN == S.LSN. Then S and S' are the same
* (logical) slot (because two different slots can never cover
* overlapping ranges of keys, and as a result, can never point
* to the same LSN). Then, S is still the correct slot to take
* down the tree, unless the range of keys covered by S has
* shrunk while P was unlatched. But the only way for S's key
* range to shrink is for its child IN to split, which could
* not have happened because if it did, the before and after
* LSNs of S would be different, given that splits are logged
* immediately. We conclude that the set of keys covered by
* S after P is relatched is the same or a superset of the keys
* covered by S before P was unlatched, and thus S (at the idx
* position) is still the correct slot to follow.
*
* 2. There is no idx position in P or S'.LSN != S.LSN. In
* this case we cannot be sure if S' (if it exists) is the
* the correct slot to follow. So, we (re)search for K in P
* to check if P is still the correct parent and find the
* correct slot to follow. If this search lands on the 1st or
* last slot in P, we may return null because using the key
* info contained in P only, we do not know the full range of
* keys covered by those two slots. If null is returned, the
* caller is expected to restart the tree search from the root.
*
* Notice that the if conditions are necessary before calling
* findEntry(). Without them, we could get into an infinite
* loop of search re-tries in the scenario where nothing changes
* in the tree and findEntry always lands on the 1st or last
* slot in P. The conditions guarantee that we may restart the
* tree search only if something changes with S while P is
* unlatched (S moves to a different position or a different
* IN or it points to a different LSN).
*
* Notice also that if P does not split while it is unlatched,
* the range of keys covered by P does not change either. This
* implies that the correct slot to follow *must* be inside P,
* and as a result, the 1st and last slots in P can be trusted.
* Unfortunately, however, we have no way to detecting reliably
* whether P splits or not.
*
* Special care for DBs in DW mode:
*
* For DBs in DW mode, special care must be taken because
* splits are not immediately logged. So, for DW DBs, to avoid
* a call to findEntry() we require that not only S'.LSN ==
* S.LSN, but also the the child is not cached. These 2
* conditions together guarantee that the child did not split
* while P was unlatched, because if the child did split, it
* was fetched and cached first, so after P is relatched,
* either the child would be still cached, or if it was evicted
* after the split, S.LSN would have changed.
*/
if (idx >= nEntries ||
getLsn(idx) != lsn ||
(databaseImpl.isDeferredWriteMode() &&
entryTargets.get(idx) != null)) {
if (searchKey == null) {
return null;
}
idx = findEntry(searchKey, false, false);
if ((idx == 0 || idx == nEntries - 1) &&
!isKeyInBounds(searchKey)) {
return null;
}
}
if (result != null) {
result.index = idx;
}
/*
* "this" is still the correct parent and "idx" points to the
* correct slot to follow for the search down the tree. But
* what we fetched from the log may be out-of-date by now
* (because it was fetched and then updated by other threads)
* or it may not be the correct child anymore ("idx" was
* changed by the findEntry() call above). We check 5 cases:
*
* (a) There is already a cached child at the "idx" position.
* In this case, we return whatever is there because it has to
* be the most recent version of the appropriate child node.
* This is true even when a split or reverse split occurred.
* The check for isKeyInBounds above is critical in that case.
*
* (b) There is no cached child at the "idx" slot, but the slot
* LSN is not the same as the LSN we read from the log. This is
* the case if "idx" was changed by findEntry() or other
* threads fetched the same child as this thread, updated it,
* and then evicted it. The child we fetched is obsolete and
* should not be attached. For simplicity, we just return null
* in this (quite rare) case.
*
* (c) We loaded the BIN from off-heap cache and, similar to
* case (b), another thread has loaded the same child, modified
* it, and then evicted it, placing it off-heap again. It's LSN
* did not change because it wasn't logged. We determine
* whether the off-heap BIN has changed, and if so then
* null is returned. This is also rare.
*
* (d) The child was loaded from disk (not off-heap cache) but
* an off-heap cache entry for this BIN has appeared. Another
* thread loaded the BIN from disk and then it was moved
* off-heap, possibly after it was modified. We should use the
* off-heap version and for simplicity we return null. This is
* also rare.
*
* (e) Otherwise, we attach the fetched/loaded child to the
* parent.
*/
if (entryTargets.get(idx) != null) {
child = (IN) entryTargets.get(idx);
} else if (getLsn(idx) != lsn) {
return null;
} else if (ohBytes != null &&
ohCache.haveBINBytesChanged(this, idx, ohBytes)) {
return null;
} else if (ohBytes == null &&
getOffHeapBINId(idx) >= 0) {
return null;
} else {
child.latchNoUpdateLRU(databaseImpl);
if (ohBytes != null) {
child.postLoadInit(this, idx);
} else {
child.postFetchInit(databaseImpl, lsn);
}
attachNode(idx, child, null);
child.releaseLatch();
}
success = true;
} catch (FileNotFoundException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
makeFetchErrorMsg(null, lsn, idx), e);
} catch (ErasedException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
"IN is erased unexpectedly, implied corruption. " +
makeFetchErrorMsg(null, lsn, idx), e);
} catch (EnvironmentFailureException e) {
e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx));
throw e;
} catch (RuntimeException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
makeFetchErrorMsg(e.toString(), lsn, idx), e);
} finally {
/*
* Release the parent latch if null is being returned. In this
* case, the parent was unlatched earlier during the disk read,
* and as a result, the caller cannot make any assumptions
* about the state of the parent.
*
* If we are returning or throwing out of this try block, the
* parent may or may not be latched. So, only release the latch
* if it is currently held.
*/
if (!success) {
if (child != null) {
child.incFetchStats(envImpl, isMiss);
}
releaseLatchIfOwner();
}
unpin();
}
}
assert(hasCachedChildren() == hasCachedChildrenFlag());
child.incFetchStats(envImpl, isMiss);
return child;
}
/**
* Returns the idx-th child of "this" upper IN, fetching the child from
* the log and attaching it to its parent if it is not already attached.
*
* On entry, the parent must be EX-latched already and it stays EX-latched
* for the duration of this method and on return (even in case of
* exceptions).
*
* @param idx The slot of the child to fetch.
*/
public IN fetchIN(int idx, CacheMode cacheMode) {
assert(isUpperIN());
if (!isLatchExclusiveOwner()) {
throw unexpectedState("EX-latch not held before fetch");
}
final EnvironmentImpl envImpl = getEnv();
final OffHeapCache ohCache = envImpl.getOffHeapCache();
boolean isMiss = false;
IN child = (IN) entryTargets.get(idx);
if (child == null) {
final long lsn = getLsn(idx);
if (lsn == DbLsn.NULL_LSN) {
throw unexpectedState(
makeFetchErrorMsg("NULL_LSN in upper IN", lsn, idx));
}
try {
byte[] ohBytes = null;
if (getNormalizedLevel() == 2) {
ohBytes = ohCache.getBINBytes(this, idx);
if (ohBytes != null) {
child = ohCache.materializeBIN(envImpl, ohBytes);
}
}
if (child == null) {
final WholeEntry wholeEntry = envImpl.getLogManager().
getLogEntryAllowInvisibleAtRecovery(
lsn, getLastLoggedSize(idx));
final LogEntry logEntry = wholeEntry.getEntry();
child = (IN) logEntry.getResolvedItem(databaseImpl);
isMiss = true;
}
child.latchNoUpdateLRU(databaseImpl);
if (ohBytes != null) {
child.postLoadInit(this, idx);
} else {
child.postFetchInit(databaseImpl, lsn);
}
attachNode(idx, child, null);
child.releaseLatch();
} catch (FileNotFoundException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
makeFetchErrorMsg(null, lsn, idx), e);
} catch (ErasedException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
"IN is erased unexpectedly, implied corruption. " +
makeFetchErrorMsg(null, lsn, idx), e);
} catch (EnvironmentFailureException e) {
e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx));
throw e;
} catch (RuntimeException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
makeFetchErrorMsg(e.toString(), lsn, idx), e);
}
}
assert(hasCachedChildren() == hasCachedChildrenFlag());
child.incFetchStats(envImpl, isMiss);
return child;
}
/**
* Returns the idx-th child of "this" upper IN, loading the child from
* off-heap and attaching it to its parent if it is not already attached
* and is cached off-heap. This method does not fetch from disk, and will
* return null if the child is not in the main or off-heap cache.
*
* On entry, the parent must be EX-latched already and it stays EX-latched
* for the duration of this method and on return (even in case of
* exceptions).
*
* @param idx The slot of the child to fetch.
*
* @return null if the LN is not in the main or off-heap cache.
*/
public IN loadIN(int idx, CacheMode cacheMode) {
assert(isUpperIN());
if (!isLatchExclusiveOwner()) {
throw unexpectedState("EX-latch not held before load");
}
IN child = (IN) entryTargets.get(idx);
if (child != null) {
return child;
}
if (getNormalizedLevel() != 2) {
return null;
}
final EnvironmentImpl envImpl = getEnv();
final OffHeapCache ohCache = envImpl.getOffHeapCache();
final long lsn = getLsn(idx);
try {
final byte[] ohBytes = ohCache.getBINBytes(this, idx);
if (ohBytes == null) {
return null;
}
child = ohCache.materializeBIN(envImpl, ohBytes);
child.latchNoUpdateLRU(databaseImpl);
child.postLoadInit(this, idx);
attachNode(idx, child, null);
child.releaseLatch();
return child;
} catch (RuntimeException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
makeFetchErrorMsg(e.toString(), lsn, idx), e);
}
}
/**
* Returns the target of the idx'th entry, fetching from disk if necessary.
*
* Null is returned in the following cases:
*
* 1. if the LSN is null and the KnownDeleted flag is set; or
* 2. if the LSN's file has been cleaned and:
* a. the PendingDeleted or KnownDeleted flag is set, or
* b. the entry is "probably expired".
*
* Note that checking for PD/KD before calling this method is not
* sufficient to ensure that null is not returned, because null is also
* returned for expired records.
*
* When null is returned, the caller must treat the record as deleted.
*
* Note that null can only be returned for a slot that could contain an LN,
* not other node types and not a DupCountLN since DupCountLNs are never
* deleted or expired.
*
* An exclusive latch must be held on this BIN.
*
* @return the LN or null.
*/
public final LN fetchLN(int idx, CacheMode cacheMode) {
return (LN) fetchLN(idx, cacheMode, false);
}
/*
* This method may return either an LN or a DIN child of a BIN. It is meant
* to be used from DupConvert only.
*/
public final Node fetchLNOrDIN(int idx, CacheMode cacheMode) {
return fetchLN(idx, cacheMode, true);
}
/*
* Underlying implementation of the above fetchLNXXX methods.
*/
private Node fetchLN(int idx, CacheMode cacheMode, boolean dupConvert) {
assert(isBIN());
if (!isLatchExclusiveOwner()) {
throw unexpectedState("EX-latch not held before fetch");
}
if (isEntryKnownDeleted(idx)) {
return null;
}
final BIN bin = (BIN) this;
final EnvironmentImpl envImpl = getEnv();
final OffHeapCache ohCache = envImpl.getOffHeapCache();
boolean isMiss = false;
Node child = entryTargets.get(idx);
/* Fetch it from disk. */
if (child == null) {
final long lsn = getLsn(idx);
if (lsn == DbLsn.NULL_LSN) {
throw unexpectedState(makeFetchErrorMsg(
"NULL_LSN without KnownDeleted", lsn, idx));
}
/*
* Fetch of immediately obsolete LN not allowed. The only exception
* is during conversion of an old-style dups DB.
*/
if (isEmbeddedLN(idx) ||
(databaseImpl.isLNImmediatelyObsolete() && !dupConvert)) {
throw unexpectedState("May not fetch immediately obsolete LN");
}
try {
byte[] lnSlotKey = null;
child = ohCache.loadLN(bin, idx, cacheMode);
if (child == null) {
final WholeEntry wholeEntry = envImpl.getLogManager().
getLogEntryAllowInvisibleAtRecovery(
lsn, getLastLoggedSize(idx));
/* Last logged size is not present before log version 9. */
setLastLoggedSize(
idx, wholeEntry.getHeader().getEntrySize());
final LogEntry logEntry = wholeEntry.getEntry();
if (logEntry instanceof LNLogEntry) {
final LNLogEntry<?> lnEntry =
(LNLogEntry<?>) wholeEntry.getEntry();
lnEntry.postFetchInit(databaseImpl);
lnSlotKey = lnEntry.getKey();
if (cacheMode != CacheMode.EVICT_LN &&
cacheMode != CacheMode.EVICT_BIN &&
cacheMode != CacheMode.UNCHANGED &&
cacheMode != CacheMode.MAKE_COLD) {
getEvictor().moveToPri1LRU(this);
}
}
child = (Node) logEntry.getResolvedItem(databaseImpl);
isMiss = true;
}
child.postFetchInit(databaseImpl, lsn);
attachNode(idx, child, lnSlotKey);
} catch (FileNotFoundException e) {
/*
* Cleaner got to the log file. Do not consider this an
* integrity error if deleted, likely expired, EXTINCT or
* MAYBE_EXTINCT. It is safe to ignore a deleted file for a
* KD or PD entry because files with active txns will not be
* cleaned.
*/
if (!bin.isDeleted(idx) &&
!bin.isProbablyExpired(idx) &&
envImpl.getExtinctionState(
databaseImpl, bin.getKey(idx)) == NOT_EXTINCT) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
makeFetchErrorMsg(null, lsn, idx), e);
}
return null;
} catch (ErasedException e) {
/*
* Eraser got to the log file. Do not consider this an
* integrity error if EXTINCT or MAYBE_EXTINCT.
*/
if (envImpl.getExtinctionState(
databaseImpl, bin.getKey(idx)) == NOT_EXTINCT) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
"LN is erased unexpectedly, implied corruption. " +
makeFetchErrorMsg(null, lsn, idx), e);
}
return null;
} catch (EnvironmentFailureException e) {
e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx));
throw e;
} catch (RuntimeException e) {
throw new EnvironmentFailureException(
envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
makeFetchErrorMsg(e.toString(), lsn, idx), e);
}
}
if (child.isLN()) {
final LN ln = (LN) child;
if (cacheMode != CacheMode.UNCHANGED &&
cacheMode != CacheMode.MAKE_COLD) {
ln.setFetchedCold(false);
}
ohCache.freeRedundantLN(bin, idx, ln, cacheMode);
}
child.incFetchStats(envImpl, isMiss);
return child;
}
/**
* Return the idx'th LN target, enforcing rules defined by the cache modes
* for the LN. This method should be called instead of getTarget when a
* cache mode applies to user operations such as reads and updates.
*/
public final LN getLN(int idx, CacheMode cacheMode) {
assert isBIN();
final LN ln = (LN) entryTargets.get(idx);
if (ln == null) {
return null;
}
if (cacheMode != CacheMode.UNCHANGED &&
cacheMode != CacheMode.MAKE_COLD) {
ln.setFetchedCold(false);
}
final OffHeapCache ohCache = getOffHeapCache();
if (ohCache.isEnabled()) {
ohCache.freeRedundantLN((BIN) this, idx, ln, cacheMode);
}
return ln;
}
/**
* Initialize a node that has been read in from the log.
*/
@Override
public final void postFetchInit(DatabaseImpl db, long fetchedLsn) {
assert isLatchExclusiveOwner();
commonInit(db);
setLastLoggedLsn(fetchedLsn);
convertDupKeys(); // must be after initMemorySize
addToMainCache();
if (isBIN()) {
setFetchedCold(true);
}
/* See Database.mutateDeferredWriteBINDeltas. */
if (db.isDeferredWriteMode()) {
mutateToFullBIN(false);
}
}
/**
* Initialize a BIN loaded from off-heap cache.
*
* Does not call setLastLoggedLsn because materialization of the off-heap
* BIN initializes all fields including the last logged/delta LSNs.
*/
private void postLoadInit(IN parent, int idx) {
assert isLatchExclusiveOwner();
commonInit(parent.databaseImpl);
addToMainCache();
if (isBIN()) {
setFetchedCold(true);
setFetchedColdOffHeap(true);
}
getEnv().getOffHeapCache().postBINLoad(parent, idx, (BIN) this);
}
/**
* Initialize a node read in during recovery.
*/
public final void postRecoveryInit(DatabaseImpl db, long lastLoggedLsn) {
commonInit(db);
setLastLoggedLsn(lastLoggedLsn);
}
/**
* Common actions of postFetchInit, postLoadInit and postRecoveryInit.
*/
private void commonInit(DatabaseImpl db) {
setDatabase(db);
initMemorySize(); // compute before adding to IN list
}
/**
* Add to INList and perform eviction related initialization.
*/
private void addToMainCache() {
getEnv().getInMemoryINs().add(this);
if (!isDIN() && !isDBIN()) {
if (isUpperIN() && traceLRU) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
" postFetchInit(): " +
" Adding UIN to LRU: " + getNodeId());
}
getEvictor().addBack(this);
}
/* Compress full BINs after fetching or loading. */
if (!(this instanceof DBIN || this instanceof DIN)) {
getEnv().lazyCompress(this);
}
}
/**
* Needed only during duplicates conversion, not during normal operation.
* The needDupKeyConversion field will only be true when first upgrading
* from JE 4.1. After the first time an IN is converted, it will be
* written out in a later file format, so the needDupKeyConversion field
* will be false and this method will do nothing. See
* DupConvert.convertInKeys.
*/
private void convertDupKeys() {
/* Do not convert more than once. */
if (!needDupKeyConversion) {
return;
}
needDupKeyConversion = false;
DupConvert.convertInKeys(databaseImpl, this);
}
/**
* @see Node#incFetchStats
*/
@Override
final void incFetchStats(EnvironmentImpl envImpl, boolean isMiss) {
Evictor e = envImpl.getEvictor();
if (isBIN()) {
e.incBINFetchStats(isMiss, isBINDelta(false/*checLatched*/));
} else {
e.incUINFetchStats(isMiss);
}
}
public String makeFetchErrorMsg(
final String msg,
final long lsn,
final int idx) {
final byte state = idx >= 0 ? entryStates[idx] : 0;
final long expirationTime;
if (isBIN() && idx >= 0) {
final BIN bin = (BIN) this;
expirationTime = TTL.expirationToSystemTime(
bin.getExpiration(idx), isExpirationInHours());
} else {
expirationTime = 0;
}
return makeFetchErrorMsg(msg, this, lsn, state, expirationTime);
}
/**
* @param in parent IN. Is null when root is fetched.
*/
static String makeFetchErrorMsg(
String msg,
IN in,
long lsn,
byte state,
long expirationTime) {
/*
* Bolster the exception with the LSN, which is critical for
* debugging. Otherwise, the exception propagates upward and loses the
* problem LSN.
*/
StringBuilder sb = new StringBuilder();
if (in == null) {
sb.append("fetchRoot of ");
} else if (in.isBIN()) {
sb.append("fetchLN of ");
} else {
sb.append("fetchIN of ");
}
if (lsn == DbLsn.NULL_LSN) {
sb.append("null lsn");
} else {
sb.append(DbLsn.getNoFormatString(lsn));
}
if (in != null) {
sb.append(" parent IN=").append(in.getNodeId());
sb.append(" IN class=").append(in.getClass().getName());
sb.append(" lastFullLsn=");
sb.append(DbLsn.getNoFormatString(in.getLastFullLsn()));
sb.append(" lastLoggedLsn=");
sb.append(DbLsn.getNoFormatString(in.getLastLoggedLsn()));
sb.append(" parent.getDirty()=").append(in.getDirty());
}
sb.append(" state=").append(state);
sb.append(" expires=");
if (expirationTime != 0) {
sb.append(TTL.formatExpirationTime(expirationTime));
} else {
sb.append("never");
}
if (msg != null) {
sb.append(" ").append(msg);
}
return sb.toString();
}
public final int findEntry(
byte[] key,
boolean indicateIfDuplicate,
boolean exact) {
return findEntry(key, indicateIfDuplicate, exact, null /*Comparator*/);
}
/**
* Find the entry in this IN for which key is LTE the key arg.
*
* Currently uses a binary search, but eventually, this may use binary or
* linear search depending on key size, number of entries, etc.
*
* This method guarantees that the key parameter, which is the user's key
* parameter in user-initiated search operations, is always the left hand
* parameter to the Comparator.compare method. This allows a comparator
* to perform specialized searches, when passed down from upper layers.
*
* This is public so that DbCursorTest can access it.
*
* Note that the 0'th entry's key is treated specially in an IN. It always
* compares lower than any other key.
*
* @param key - the key to search for.
* @param indicateIfDuplicate - true if EXACT_MATCH should
* be or'd onto the return value if key is already present in this node.
* @param exact - true if an exact match must be found.
* @return offset for the entry that has a key LTE the arg. 0 if key
* is less than the 1st entry. -1 if exact is true and no exact match
* is found. If indicateIfDuplicate is true and an exact match was found
* then EXACT_MATCH is or'd onto the return value.
*/
public final int findEntry(
byte[] key,
boolean indicateIfDuplicate,
boolean exact,
Comparator<byte[]> comparator) {
assert idKeyIsSlotKey();
int high = nEntries - 1;
int low = 0;
int middle = 0;
if (comparator == null) {
comparator = databaseImpl.getKeyComparator();
}
/*
* Special Treatment of 0th Entry
* ------------------------------
* IN's are special in that they have a entry[0] where the key is a
* virtual key in that it always compares lower than any other key.
* BIN's don't treat key[0] specially. But if the caller asked for an
* exact match or to indicate duplicates, then use the key[0] and
* forget about the special entry zero comparison.
*
* We always use inexact searching to get down to the BIN, and then
* call findEntry separately on the BIN if necessary. So the behavior
* of findEntry is different for BINs and INs, because it's used in
* different ways.
*
* Consider a tree where the lowest key is "b" and we want to insert
* "a". If we did the comparison (with exact == false), we wouldn't
* find the correct (i.e. the left) path down the tree. So the
* virtual key ensures that "a" gets inserted down the left path.
*
* The insertion case is a good specific example. findBinForInsert
* does inexact searching in the INs only, not the BIN.
*
* There's nothing special about the 0th key itself, only the use of
* the 0th key in the comparison algorithm.
*/
boolean entryZeroSpecialCompare =
isUpperIN() && !exact && !indicateIfDuplicate;
assert nEntries >= 0;
while (low <= high) {
middle = (high + low) / 2;
int s;
if (middle == 0 && entryZeroSpecialCompare) {
s = 1;
} else {
s = entryKeys.compareKeys(
key, keyPrefix, middle,
haveEmbeddedData(middle), comparator);
}
if (s < 0) {
high = middle - 1;
} else if (s > 0) {
low = middle + 1;
} else {
int ret;
if (indicateIfDuplicate) {
ret = middle | EXACT_MATCH;
} else {
ret = middle;
}
if ((ret >= 0) && exact && isEntryKnownDeleted(ret & 0xffff)) {
return -1;
} else {
return ret;
}
}
}
/*
* No match found. Either return -1 if caller wanted exact matches
* only, or return entry whose key is < search key.
*/
if (exact) {
return -1;
} else {
return high;
}
}
/**
* Inserts a slot with the given key, lsn and child node into this IN, if
* a slot with the same key does not exist already. The state of the new
* slot is set to DIRTY. Assumes this node is already latched by the
* caller.
*
* @return true if the entry was successfully inserted, false
* if it was a duplicate.
*
* @throws EnvironmentFailureException if the node is full
* (it should have been split earlier).
*/
public final boolean insertEntry(
Node child,
byte[] key,
long childLsn)
throws DatabaseException {
assert(!isBINDelta());
int res = insertEntry1(
child, key, null, childLsn, EntryStates.DIRTY_BIT, false);
return (res & INSERT_SUCCESS) != 0;
}
/**
* Inserts a slot with the given key, lsn and child node into this IN, if
* a slot with the same key does not exist already. The state of the new
* slot is set to DIRTY. Assumes this node is already latched by the
* caller.
*
* @param data If the data portion of a record must be embedded in this
* BIN, "data" stores the record's data. Null otherwise. See also comment
* for the keyEntries field.
*
* @return either (1) the index of location in the IN where the entry was
* inserted |'d with INSERT_SUCCESS, or (2) the index of the duplicate in
* the IN if the entry was found to be a duplicate.
*
* @throws EnvironmentFailureException if the node is full (it should have
* been split earlier).
*/
public final int insertEntry1(
Node child,
byte[] key,
byte[] data,
long childLsn,
boolean blindInsertion) {
return insertEntry1(
child, key, data, childLsn, EntryStates.DIRTY_BIT,
blindInsertion);
}
/**
* Inserts a slot with the given key, lsn, state, and child node into this
* IN, if a slot with the same key does not exist already. Assumes this
* node is already latched by the caller.
*
* This returns a failure if there's a duplicate match. The caller must do
* the processing to check if the entry is actually deleted and can be
* overwritten. This is foisted upon the caller rather than handled in this
* object because there may be some latch releasing/retaking in order to
* check a child LN.
*
* @param data If the data portion of a record must be embedded in this
* BIN, "data" stores the record's data. Null otherwise. See also comment
* for the keyEntries field.
*
* @return either (1) the index of location in the IN where the entry was
* inserted |'d with INSERT_SUCCESS, or (2) the index of the duplicate in
* the IN if the entry was found to be a duplicate.
*
* @throws EnvironmentFailureException if the node is full (it should have
* been split earlier).
*/
public final int insertEntry1(
Node child,
byte[] key,
byte[] data,
long childLsn,
byte state,
boolean blindInsertion) {
/*
* Search without requiring an exact match, but do let us know the
* index of the match if there is one.
*/
int index = findEntry(key, true, false);
if (index >= 0 && (index & EXACT_MATCH) != 0) {
/*
* There is an exact match. Don't insert; let the caller decide
* what to do with this duplicate.
*/
return index & ~IN.EXACT_MATCH;
}
/*
* There was no key match, but if this is a bin delta, there may be an
* exact match in the full bin. Mutate to full bin and search again.
* However, if we know for sure that the key does not exist in the full
* BIN, then don't mutate, unless there is no space in the delta to do
* the insertion.
*/
if (isBINDelta()) {
BIN bin = (BIN)this;
boolean doBlindInsertion = (nEntries < getMaxEntries());
if (doBlindInsertion &&
!blindInsertion &&
bin.mayHaveKeyInFullBin(key)) {
doBlindInsertion = false;
}
if (!doBlindInsertion) {
mutateToFullBIN(true /*leaveFreeSlot*/);
index = findEntry(key, true, false);
if (index >= 0 && (index & EXACT_MATCH) != 0) {
return index & ~IN.EXACT_MATCH;
}
} else {
getEvictor().incBinDeltaBlindOps();
if (traceDeltas) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
(blindInsertion ?
" Blind insertion in BIN-delta " :
" Blind put in BIN-delta ") +
getNodeId() + " nEntries = " +
nEntries + " max entries = " +
getMaxEntries() +
" full BIN entries = " +
bin.getFullBinNEntries() +
" full BIN max entries = " +
bin.getFullBinMaxEntries());
}
}
}
if (nEntries >= getMaxEntries()) {
throw unexpectedState(
getEnv(),
"Node " + getNodeId() +
" should have been split before calling insertEntry" +
" is BIN-delta: " + isBINDelta() +
" num entries: " + nEntries +
" max entries: " + getMaxEntries());
}
/* There was no key match, so insert to the right of this entry. */
index++;
/* We found a spot for insert, shift entries as needed. */
if (index < nEntries) {
int oldSize = computeLsnOverhead();
/* Adding elements to the LSN array can change the space used. */
shiftEntriesRight(index);
updateMemorySize(computeLsnOverhead() - oldSize);
} else {
nEntries++;
}
if (isBINDelta()) {
((BIN)this).incFullBinNEntries();
}
int oldSize = computeLsnOverhead();
if (data == null || databaseImpl.isDeferredWriteMode()) {
setTarget(index, child);
}
setLsnInternal(index, childLsn);
boolean multiSlotChange = insertKey(index, key, data);
/*
* Do this after calling insert key to overwrite whatever state changes
* were done by the insertEntry() call.
*/
entryStates[index] = state;
if (data != null) {
setEmbeddedLN(index);
if (data.length == 0) {
setNoDataLN(index);
}
}
adjustCursorsForInsert(index);
updateMemorySize(oldSize,
getEntryInMemorySize(index) +
computeLsnOverhead());
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
}
setDirty(true);
assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
return (index | INSERT_SUCCESS);
}
/**
* Removes the slot at index from this IN. Assumes this node is already
* latched by the caller.
*
* @param index The index of the entry to delete from the IN.
*/
public void deleteEntry(int index) {
deleteEntry(index, true /*makeDirty*/, true /*validate*/);
}
/**
* Variant that allows specifying whether the IN is dirtied and whether
* validation takes place. 'validate' should be false only in tests.
*
* See BIN.compress and INCompressor for a discussion about why slots can
* be deleted without dirtying the BIN, and why the next delta is
* prohibited when the slot is dirty.
*/
void deleteEntry(int index, boolean makeDirty, boolean validate) {
assert !isBINDelta();
assert index >= 0 && index < nEntries;
assert !validate || validateSubtreeBeforeDelete(index);
if (makeDirty) {
setDirty(true);
}
if (isDirty(index)) {
setProhibitNextDelta(true);
}
Node child = getTarget(index);
final OffHeapCache ohCache = getEnv().getOffHeapCache();
final int level = getNormalizedLevel();
if (level == 1) {
ohCache.freeLN((BIN) this, index);
} else if (level == 2) {
ohCache.freeBIN((BIN) child, this, index);
}
if (child != null && child.isIN()) {
IN childIN = (IN)child;
getEnv().getInMemoryINs().remove(childIN);
}
updateMemorySize(getEntryInMemorySize(index), 0);
int oldLSNArraySize = computeLsnOverhead();
/*
* Do the actual deletion. Note: setTarget() must be called before
* copyEntries() so that the hasCachedChildrenFlag will be properly
* maintained.
*/
setTarget(index, null);
copyEntries(index + 1, index, nEntries - index - 1);
nEntries--;
/* cleanup what used to be the last entry */
clearEntry(nEntries);
/* setLsnInternal can mutate to an array of longs. */
updateMemorySize(oldLSNArraySize, computeLsnOverhead());
assert(isBIN() || hasCachedChildrenFlag() == hasCachedChildren());
/*
* Note that we don't have to adjust cursors for delete, since
* there should be nothing pointing at this record.
*/
traceDelete(Level.FINEST, index);
}
/**
* WARNING: clearEntry() calls entryTargets.set() directly, instead of
* setTarget(). As a result, the hasCachedChildren flag of the IN is not
* updated here. The caller is responsible for updating this flag, if
* needed.
*/
void clearEntry(int idx) {
entryTargets = entryTargets.set(idx, null, this);
entryKeys = entryKeys.set(idx, null, this);
offHeapBINIds = offHeapBINIds.set(idx, 0, this);
setLsnInternal(idx, DbLsn.NULL_LSN);
entryStates[idx] = 0;
}
/**
* This method is called after the idx'th child of this node gets logged,
* and changes position as a result.
*
* @param newLSN The new on-disk position of the child.
*
* @param newVLSN The VLSN of the logrec at the new position.
* For LN children only.
*
* @param newSize The size of the logrec at the new position.
* For LN children only.
*/
public final void updateEntry(
int idx,
long newLSN,
long newVLSN,
int newSize) {
setLsn(idx, newLSN);
if (isBIN()) {
if (isEmbeddedLN(idx)) {
((BIN)this).setCachedVLSN(idx, newVLSN);
} else {
setLastLoggedSize(idx, newSize);
}
}
setDirty(true);
}
/**
* This method is called only from BIN.applyDelta(). It applies the info
* extracted from a delta slot to the corresponding slot in the full BIN.
*
* Unlike other update methods, the LSN may be NULL_LSN if the KD flag is
* set. This allows applying a BIN-delta with a NULL_LSN and KD, for an
* invisible log entry for example.
*
* No need to do memory counting in this method because the BIN is not
* yet attached to the tree.
*/
final void applyDeltaSlot(
int idx,
Node node,
long lsn,
int lastLoggedSize,
byte state,
byte[] key,
byte[] data) {
assert(isBIN());
assert(!isBINDelta());
assert(lsn != DbLsn.NULL_LSN ||
(state & EntryStates.KNOWN_DELETED_BIT) != 0);
assert(node == null || data == null);
assert(!getInListResident());
((BIN) this).freeOffHeapLN(idx);
setLsn(idx, lsn, false/*check*/);
setLastLoggedSize(idx, lastLoggedSize);
setTarget(idx, node);
updateLNSlotKey(idx, key, data);
assert(isEmbeddedLN(idx) == isEmbeddedLN(state));
assert(isNoDataLN(idx) == isNoDataLN(state));
entryStates[idx] = state;
setDirty(true);
}
/**
* Update the idx slot of this BIN to reflect a record insertion in an
* existing KD slot. It is called from CursorImpl.insertRecordInternal(),
* after logging the insertion op.
*
* @param newLN The LN associated with the new record.
*
* @param newLSN The LSN of the insertion logrec.
*
* @param newSize The size of the insertion logrec.
*
* @param newKey The value for the record's key. It is equal to the current
* key value in the slot, but may not be identical to that value if a
* custom comparator is used.
*
* @param newData If the record's data must be embedded in this BIN, "data"
* stores the record's data. Null otherwise. See also comment for the
* keyEntries field.
*/
public final void insertRecord(
int idx,
LN newLN,
long newLSN,
int newSize,
byte[] newKey,
byte[] newData,
int expiration,
boolean expirationInHours) {
assert(isBIN());
final BIN bin = (BIN) this;
bin.freeOffHeapLN(idx); // old version of the LN is stale
long oldSlotSize = getEntryInMemorySize(idx);
setLsn(idx, newLSN);
boolean multiSlotChange = updateLNSlotKey(idx, newKey, newData);
if (isEmbeddedLN(idx)) {
clearLastLoggedSize(idx);
bin.setCachedVLSN(idx, newLN.getVLSNSequence());
if (databaseImpl.isDeferredWriteMode()) {
setTarget(idx, newLN);
}
} else {
setTarget(idx, newLN);
setLastLoggedSize(idx, newSize);
}
bin.setExpiration(idx, expiration, expirationInHours);
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
}
clearKnownDeleted(idx);
clearPendingDeleted(idx);
setDirty(true);
assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
}
/**
* Update the idx slot of this BIN to reflect an update of the associated
* record. It is called from CursorImpl.updateRecordInternal(), after
* logging the update op.
*
* @param oldMemSize If the child LN was cached before the update op, it has
* already been updated in-place by the caller. In this case, oldMemSize
* stores the size of the child LN before the update, and it is used to do
* memory counting. Otherwise oldMemSize is 0 and the newly created LN has
* not been attached to the tree; it will be attached later by the caller,
* if needed.
*
* @param newLSN The LSN of the update logrec.
*
* @param newVLSN The VLSN of the update logrec.
*
* @param newSize The on-disk size of the update logrec.
*
* @param newKey The new value for the record's key. It is equal to the
* current value, but may not be identical to the current value if a
* custom comparator is used. It may be null, if the caller knows for
* sure that the key does not change.
*
* @param newData If the record's data must be embedded in this BIN, "data"
* stores the record's data. Null otherwise. See also comment for the
* keyEntries field.
*/
public final void updateRecord(
int idx,
long oldMemSize,
long newLSN,
long newVLSN,
int newSize,
byte[] newKey,
byte[] newData,
int expiration,
boolean expirationInHours) {
assert(isBIN());
final BIN bin = (BIN) this;
bin.freeOffHeapLN(idx); // old version of the LN is stale
long oldSlotSize = getEntryInMemorySize(idx);
setLsn(idx, newLSN);
boolean multiSlotChange = updateLNSlotKey(idx, newKey, newData);
if (isEmbeddedLN(idx)) {
clearLastLoggedSize(idx);
((BIN)this).setCachedVLSN(idx, newVLSN);
} else {
setLastLoggedSize(idx, newSize);
}
bin.setExpiration(idx, expiration, expirationInHours);
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
/* Update mem size for key change. */
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
/* Update mem size for node change. */
Node newLN = entryTargets.get(idx);
long newMemSize =
(newLN != null ? newLN.getMemorySizeIncludedByParent() : 0);
updateMemorySize(oldMemSize, newMemSize);
}
setDirty(true);
}
/**
* Update the idx slot slot of this BIN to reflect a deletion of the
* associated record. It is called from CursorImpl.deleteCurrentRecord(),
* after logging the deletion op.
*
* @param oldMemSize If the child LN was cached before the deletion, it
* has already been updated in-place by the caller (the ln contents have
* been deleted). In this case, oldMemSize stores the in-memory size of
* the child LN before the update, and it is used to do memory counting.
* Otherwise oldMemSize is 0 and the newly created LN has not been attached
* to the tree; it will be attached later by the caller, if needed.
*
* @param newLSN The LSN of the deletion logrec.
*
* @param newVLSN The VLSN of the deletion logrec.
*
* @param newSize The on-disk size of the deletion logrec.
*/
public final void deleteRecord(
int idx,
long oldMemSize,
long newLSN,
long newVLSN,
int newSize) {
assert(isBIN());
final BIN bin = (BIN) this;
bin.freeOffHeapLN(idx); // old version of the LN is stale
setLsn(idx, newLSN);
if (isEmbeddedLN(idx)) {
clearLastLoggedSize(idx);
bin.setCachedVLSN(idx, newVLSN);
} else {
setLastLoggedSize(idx, newSize);
}
if (entryTargets.get(idx) != null) {
/* Update mem size for node change. */
assert(oldMemSize != 0);
Node newLN = entryTargets.get(idx);
long newMemSize = newLN.getMemorySizeIncludedByParent();
updateMemorySize(oldMemSize, newMemSize);
} else {
assert(oldMemSize == 0);
}
setPendingDeleted(idx);
setDirty(true);
}
/**
* This method is used by the RecoveryManager to change the current version
* of a record, either to a later version (in case of redo), or to an
* earlier version (in case of undo). The current version may or may not be
* cached as a child LN of this BIN (it may be only in case of txn abort
* during normal processing). If it is, it is evicted. The new version is
* not attached to the in-memory tree, to save memory during crash
* recovery.
*
* @param idx The BIN slot for the record.
*
* @param lsn The LSN of the new record version. It may be null in case of
* undo, if the logrec that is being undone is an insertion and the record
* did not exist at all in the DB before that insertion.
*
* @param knownDeleted True if the new version is a committed deletion.
*
* @param pendingDeleted True if the new version is a deletion, which
* may or may not be committed.
*
* @param key The key of the new version. It is null only if we are undoing
* and the revert-to version was not embedded (in this case the key of the
* revert-to version is not stored in the logrec). If it is null and the
* DB allows key updates, the new record version is fetched from disk to
* retrieve its key, so that the key values stored in the BIN slots are
* always transactionally correct.
*
* @param data The data of the new version. It is non-null if and only if
* the new version must be embedded in the BIN.
*
* @param vlsn The VLSN of the new version.
*
* @param logrecSize The on-disk size of the logrec corresponding to the
* new version. It may be 0 (i.e. unknown) in case of undo.
*/
public final void recoverRecord(
int idx,
long lsn,
boolean knownDeleted,
boolean pendingDeleted,
byte[] key,
byte[] data,
long vlsn,
int logrecSize,
int expiration,
boolean expirationInHours) {
assert(isBIN());
BIN bin = (BIN) this;
bin.freeOffHeapLN(idx); // old version of the LN is stale
if (lsn == DbLsn.NULL_LSN) {
/*
* A NULL lsn means that we are undoing an insertion that was done
* without slot reuse. To undo such an insertion we evict the
* current version (it may cached only in case of normal txn abort)
* and set the KD flag in the slot. We also set the LSN to null to
* ensure that the slot does not point to a logrec that does not
* reflect the slot's current state. The slot can then be put on
* the compressor for complete removal.
*/
setKnownDeletedAndEvictLN(idx);
setLsnInternal(idx, DbLsn.NULL_LSN);
bin.queueSlotDeletion(idx);
return;
}
if (key == null &&
databaseImpl.allowsKeyUpdates() &&
!knownDeleted) {
try {
WholeEntry wholeEntry = getEnv().getLogManager().
getLogEntryAllowInvisibleAtRecovery(
lsn, getLastLoggedSize(idx));
LNLogEntry<?> logrec = (LNLogEntry<?>) wholeEntry.getEntry();
logrec.postFetchInit(getDatabase());
key = logrec.getKey();
logrecSize = wholeEntry.getHeader().getEntrySize();
} catch (FileNotFoundException e) {
throw new EnvironmentFailureException(
getEnv(), EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
makeFetchErrorMsg(null, lsn, idx), e);
} catch (ErasedException e) {
throw new EnvironmentFailureException(
getEnv(), EnvironmentFailureReason.LOG_CHECKSUM,
"LN is erased unexpectedly, implied corruption. " +
makeFetchErrorMsg(null, lsn, idx), e);
}
}
long oldSlotSize = getEntryInMemorySize(idx);
setLsn(idx, lsn);
setTarget(idx, null);
boolean multiSlotChange = updateLNSlotKey(idx, key, data);
if (isEmbeddedLN(idx)) {
clearLastLoggedSize(idx);
bin.setCachedVLSN(idx, vlsn);
} else {
setLastLoggedSize(idx, logrecSize);
}
if (knownDeleted) {
assert(!pendingDeleted);
setKnownDeleted(idx);
bin.queueSlotDeletion(idx);
} else {
clearKnownDeleted(idx);
if (pendingDeleted) {
setPendingDeleted(idx);
bin.queueSlotDeletion(idx);
} else {
clearPendingDeleted(idx);
}
}
bin.setExpiration(idx, expiration, expirationInHours);
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
}
setDirty(true);
}
/**
* Update the cached-child and LSN properties of the idx-th slot. This
* method is used by the RecoveryManager.recoverChildIN() to change the
* version of a child IN, a later version The child IN may or may not be
* already attached to the tree.
*/
public final void recoverIN(
int idx,
Node node,
long lsn,
int lastLoggedSize) {
long oldSlotSize = getEntryInMemorySize(idx);
/*
* If we are about to detach a cached child IN, make sure that it is
* not in the INList. This is correct, because this method is called
* during the recovery phase where the INList is disabled,
*/
Node child = getTarget(idx);
assert(child == null ||
!((IN)child).getInListResident() ||
child == node/* this is needed by a unit test*/);
setLsn(idx, lsn);
setLastLoggedSize(idx, lastLoggedSize);
setTarget(idx, node);
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
setDirty(true);
assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
}
/**
* Attach the given node as the idx-th child of "this" node. If the child
* node is an LN, update the key of the parent slot to the given key value,
* if that value is non-null and an update is indeed necessary.
*
* This method is called after the child node has been either (a) fetched
* in from disk and is not dirty, or (b) is a newly created instance that
* will be written out later by something like a checkpoint. In either
* case, the slot LSN does not need to be updated.
*
* Note: does not dirty the node unless the LN slot key is changed.
*/
public final void attachNode(int idx, Node node, byte[] newKey) {
assert !(node instanceof IN) || ((IN) node).isLatchExclusiveOwner();
long oldSlotSize = getEntryInMemorySize(idx);
/* Make sure we are not using this method to detach a cached child */
assert(getTarget(idx) == null);
setTarget(idx, node);
boolean multiSlotChange = false;
if (isBIN() && newKey != null) {
assert(!haveEmbeddedData(idx));
multiSlotChange = updateLNSlotKey(idx, newKey, null);
}
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
}
assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
}
/*
* Detach from the tree the child node at the idx-th slot.
*
* The most common caller of this method is the evictor. If the child
* being evicted was dirty, it has just been logged and the lsn of the
* slot must be updated.
*/
public final void detachNode(int idx, boolean updateLsn, long newLsn) {
long oldSlotSize = getEntryInMemorySize(idx);
Node child = getTarget(idx);
if (updateLsn) {
setLsn(idx, newLsn);
setDirty(true);
}
setTarget(idx, null);
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
if (child != null && child.isIN()) {
getEnv().getInMemoryINs().remove((IN) child);
}
assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
}
/**
* This method is used in DupConvert, where it is called to convert the
* keys of an upper IN that has just been fetched from the log and is not
* attached to in-memory tree yet.
*/
public final void convertKey(int idx, byte[] newKey) {
long oldSlotSize = getEntryInMemorySize(idx);
boolean multiSlotChange = updateKey(idx, newKey, null);
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
long newSlotSize = getEntryInMemorySize(idx);
updateMemorySize(oldSlotSize, newSlotSize);
}
setDirty(true);
assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
}
void copyEntries(final int from, final int to, final int n) {
entryTargets = entryTargets.copy(from, to, n, this);
entryKeys = entryKeys.copy(from, to, n, this);
offHeapBINIds = offHeapBINIds.copy(from, to, n, this);
System.arraycopy(entryStates, from, entryStates, to, n);
if (entryLsnLongArray == null) {
final int fromOff = from << 2;
final int toOff = to << 2;
final int nBytes = n << 2;
System.arraycopy(entryLsnByteArray, fromOff,
entryLsnByteArray, toOff, nBytes);
} else {
System.arraycopy(entryLsnLongArray, from,
entryLsnLongArray, to,
n);
}
}
/**
* Return true if this node needs splitting. For the moment, needing to be
* split is defined by there being no free entries available.
*/
public final boolean needsSplitting() {
if (isBINDelta()) {
BIN bin = (BIN)this;
int fullBinNEntries = bin.getFullBinNEntries();
int fullBinMaxEntries = bin.getFullBinMaxEntries();
if (fullBinNEntries < 0) {
/* fullBinNEntries is unknown in logVersions < 10 */
mutateToFullBIN(false /*leaveFreeSlot*/);
} else {
assert(fullBinNEntries > 0);
return ((fullBinMaxEntries - fullBinNEntries) < 1);
}
}
return ((getMaxEntries() - nEntries) < 1);
}
/**
* Split this into two nodes. Parent IN is passed in parent and should be
* latched by the caller.
*
* childIndex is the index in parent of where "this" can be found.
*/
public final IN split(
IN parent,
int childIndex,
IN grandParent,
int maxEntries) {
return splitInternal(parent, childIndex, grandParent, maxEntries, -1);
}
/**
* Called when we know we are about to split on behalf of a key that is the
* minimum (leftSide) or maximum (!leftSide) of this node. This is
* achieved by just forcing the split to occur either one element in from
* the left or the right (i.e. splitIndex is 1 or nEntries - 1).
*/
IN splitSpecial(
IN parent,
int parentIndex,
IN grandParent,
int maxEntriesPerNode,
byte[] key,
boolean leftSide) {
int index = findEntry(key, false, false);
if (leftSide && index == 0) {
return splitInternal(
parent, parentIndex, grandParent, maxEntriesPerNode, 1);
} else if (!leftSide && index == (nEntries - 1)) {
return splitInternal(
parent, parentIndex, grandParent, maxEntriesPerNode,
nEntries - 1);
} else {
return split(
parent, parentIndex, grandParent, maxEntriesPerNode);
}
}
final IN splitInternal(
final IN parent,
final int childIndex,
final IN grandParent,
final int maxEntries,
int splitIndex)
throws DatabaseException {
assert(!isBINDelta());
/*
* Find the index of the existing identifierKey so we know which IN
* (new or old) to put it in.
*/
if (identifierKey == null) {
throw unexpectedState();
}
final int idKeyIndex = findEntry(identifierKey, false, false);
if (splitIndex < 0) {
splitIndex = nEntries / 2;
}
/* Range of entries to copy to new sibling. */
final int low, high;
if (idKeyIndex < splitIndex) {
/*
* Current node (this) keeps left half entries. Right half entries
* will go in the new node.
*/
low = splitIndex;
high = nEntries;
} else {
/*
* Current node (this) keeps right half entries. Left half entries
* will go in the new node.
*/
low = 0;
high = splitIndex;
}
final byte[] newIdKey = getKey(low);
long parentLsn;
/*
* Ensure that max entries is large enough to hold the slots being
* moved to the new sibling, with one spare slot for insertions. This
* is important when the maxEntries param is less than nEntries in this
* node, which can occur when the user reduces the fanout or when this
* node has temporarily grown beyond its original fanout.
*/
final IN newSibling = createNewInstance(
newIdKey,
Math.max(maxEntries, high - low + 1),
level);
newSibling.latch(CacheMode.UNCHANGED);
try {
boolean addedNewSiblingToCompressorQueue = false;
final int newSiblingNEntries = (high - low);
final boolean haveCachedChildren = hasCachedChildrenFlag();
assert(isBIN() || haveCachedChildren == hasCachedChildren());
final BIN bin = isBIN() ? (BIN) this : null;
/**
* Distribute entries among the split node and the new sibling.
*/
for (int i = low; i < high; i++) {
if (!addedNewSiblingToCompressorQueue &&
bin != null &&
bin.isDefunct(i)) {
addedNewSiblingToCompressorQueue = true;
getEnv().addToCompressorQueue((BIN) newSibling);
}
newSibling.appendEntryFromOtherNode(this, i);
clearEntry(i);
}
if (low == 0) {
shiftEntriesLeft(newSiblingNEntries);
}
nEntries -= newSiblingNEntries;
setDirty(true);
if (isUpperIN() && haveCachedChildren) {
setHasCachedChildrenFlag(hasCachedChildren());
}
assert(isBIN() ||
hasCachedChildrenFlag() == hasCachedChildren());
assert(isBIN() ||
newSibling.hasCachedChildrenFlag() ==
newSibling.hasCachedChildren());
adjustCursors(newSibling, low, high);
/*
* If this node has no key prefix, calculate it now that it has
* been split. This must be done before logging, to ensure the
* prefix information is made persistent [#20799].
*/
byte[] newKeyPrefix = computeKeyPrefix(-1);
recalcSuffixes(newKeyPrefix, null, null, -1);
/* Apply compaction after prefixing [#20799]. */
entryKeys = entryKeys.compact(this);
/* Only recalc if there are multiple entries in newSibling. */
if (newSibling.getNEntries() > 1) {
byte[] newSiblingPrefix = newSibling.computeKeyPrefix(-1);
newSibling.recalcSuffixes(newSiblingPrefix, null, null, -1);
/* initMemorySize calls entryKeys.compact. */
newSibling.initMemorySize();
}
assert idKeyIsSlotKey();
assert newSibling.idKeyIsSlotKey();
/*
* Update size. newSibling and parent are correct, but this IN has
* had its entries shifted and is not correct.
*
* Also, inMemorySize does not reflect changes that may have
* resulted from key prefixing related changes, it needs to be
* brought up to date, so update it appropriately for this and the
* above reason.
*/
EnvironmentImpl env = getEnv();
INList inMemoryINs = env.getInMemoryINs();
long oldMemorySize = inMemorySize;
long newSize = computeMemorySize();
updateMemorySize(oldMemorySize, newSize);
/*
* Parent refers to child through an element of the entries array.
* Depending on which half of the BIN we copied keys from, we
* either have to adjust one pointer and add a new one, or we have
* to just add a new pointer to the new sibling.
*
* We must use the provisional logging for two reasons:
*
* 1) All three log entries must be read atomically. The parent
* must get logged last, as all referred-to children must precede
* it. Provisional entries guarantee that all three are processed
* as a unit. Recovery skips provisional entries, so the changed
* children are only used if the parent makes it out to the log.
*
* 2) We log all they way to the root to avoid the "great aunt"
* problem (see RecoveryManager.buildINs), and provisional
* logging is necessary during a checkpoint for levels less than
* maxFlushLevel.
*
* We prohibit compression during logging because there should be
* at least one entry in each IN. Note the use of getKey(0) below.
*/
long newSiblingLsn =
newSibling.optionalLogProvisionalNoCompress(parent);
long myNewLsn = optionalLogProvisionalNoCompress(parent);
assert nEntries > 0;
/*
* When we update the parent entry, we make sure that we don't
* replace the parent's key that points at 'this' with a key that
* is > than the existing one. Replacing the parent's key with
* something > would effectively render a piece of the subtree
* inaccessible. So only replace the parent key with something
* <= the existing one. See tree/SplitTest.java for more details
* on the scenario.
*/
if (low == 0) {
/*
* Change the original entry to point to the new child and add
* an entry to point to the newly logged version of this
* existing child.
*/
parent.prepareForSlotReuse(childIndex);
parent.updateSplitSlot(
childIndex, newSibling, newSiblingLsn, newIdKey);
boolean inserted = parent.insertEntry(
this, getKey(0), myNewLsn);
assert inserted;
} else {
/*
* Update the existing child's LSN to reflect the newly logged
* version and insert new child into parent.
*/
parent.updateSplitSlot(childIndex, this, myNewLsn, getKey(0));
boolean inserted = parent.insertEntry(
newSibling, newIdKey, newSiblingLsn);
assert inserted;
}
inMemoryINs.add(newSibling);
/*
* Log the parent. Note that the root slot or grandparent slot is
* not updated with the parent's LSN here; this is done by
* Tree.forceSplit.
*/
if (parent.isRoot()) {
parentLsn = parent.optionalLog();
} else {
parentLsn = parent.optionalLogProvisional(grandParent);
}
/* Coordinate the split with an in-progress checkpoint. */
env.getCheckpointer().coordinateSplitWithCheckpoint(newSibling);
/*
* Check whether either the old or the new sibling must be added
* to the LRU (priority-1 LRUSet).
*/
assert(!isDIN() && !isDBIN());
if(isBIN() || !newSibling.hasCachedChildrenFlag()) {
if (isUpperIN() && traceLRU) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
"split-newSibling " +
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
" Adding UIN to LRU: " +
newSibling.getNodeId());
}
getEvictor().addBack(newSibling);
}
if (isUpperIN() &&
haveCachedChildren &&
!hasCachedChildrenFlag()) {
if (traceLRU) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
"split-oldSibling " +
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
" Adding UIN to LRU: " + getNodeId());
}
getEvictor().addBack(this);
}
/* Debug log this information. */
traceSplit(Level.FINE, parent,
newSibling, parentLsn, myNewLsn,
newSiblingLsn, splitIndex, idKeyIndex, childIndex);
} finally {
newSibling.releaseLatch();
}
return newSibling;
}
/**
* Used for moving entries between BINs during splits.
*/
void appendEntryFromOtherNode(IN from, int fromIdx) {
assert(!isBINDelta());
final Node target = from.entryTargets.get(fromIdx);
final int ohBinId = from.getOffHeapBINId(fromIdx);
final boolean ohBinPri2 = from.isOffHeapBINPri2(fromIdx);
final boolean ohBinDirty = from.isOffHeapBINDirty(fromIdx);
final long lsn = from.getLsn(fromIdx);
final byte state = from.entryStates[fromIdx];
final byte[] key = from.getKey(fromIdx);
final byte[] data = (from.haveEmbeddedData(fromIdx) ?
from.getData(fromIdx) : null);
long oldSize = computeLsnOverhead();
++nEntries;
int idx = nEntries - 1;
/*
* When calling setTarget for an IN child we must latch it, because
* setTarget sets the parent.
*/
if (target != null && target.isIN()) {
final IN in = (IN) target;
in.latchNoUpdateLRU(databaseImpl);
setTarget(idx, target);
in.releaseLatch();
} else {
setTarget(idx, target);
}
boolean multiSlotChange = insertKey(idx, key, data);
/* setLsnInternal can mutate to an array of longs. */
setLsnInternal(idx, lsn);
entryStates[idx] = state;
if (ohBinId >= 0) {
setOffHeapBINId(idx, ohBinId, ohBinPri2, ohBinDirty);
getOffHeapCache().setOwner(ohBinId, this);
}
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
long newSize = getEntryInMemorySize(idx) + computeLsnOverhead();
updateMemorySize(oldSize, newSize);
}
setDirty(true);
}
/**
* Update a slot that is being split. The slot to be updated here is the
* one that existed before the split.
*
* @param child The new child to be placed under the slot. May be the
* newly created sibling or the pre-existing sibling.
* @param lsn The new lsn of the child (the child was logged just before
* calling this method, so its slot lsn must be updated)
* @param key The new key for the slot. We should not actually update the
* slot key, because its value is the lower bound of the key range covered
* by the slot, and this lower bound does not change as a result of the
* split (the new slot created as a result of the split is placed to the
* right of the pre-existing slot). There is however one exception: the
* key can be updated if "idx" is the 0-slot. The 0-slot key is not a true
* lower bound; the actual lower bound for the 0-slot is the key in the
* parent slot for this IN. So, in this case, if the given key is less
* than the current one, it is better to update the key in order to better
* approximate the real lower bound (and thus make the isKeyInBounds()
* method more effective).
*/
private void updateSplitSlot(
int idx,
IN child,
long lsn,
byte[] key) {
assert(isUpperIN());
long oldSize = getEntryInMemorySize(idx);
setLsn(idx, lsn);
setTarget(idx, child);
if (idx == 0) {
int s = entryKeys.compareKeys(
key, keyPrefix, idx, haveEmbeddedData(idx),
getKeyComparator());
boolean multiSlotChange = false;
if (s < 0) {
multiSlotChange = updateKey(idx, key, null/*data*/);
}
if (multiSlotChange) {
updateMemorySize(inMemorySize, computeMemorySize());
} else {
long newSize = getEntryInMemorySize(idx);
updateMemorySize(oldSize, newSize);
}
} else {
long newSize = getEntryInMemorySize(idx);
updateMemorySize(oldSize, newSize);
}
setDirty(true);
assert(hasCachedChildren() == hasCachedChildrenFlag());
}
/**
* Shift entries to the right by one position, starting with (and
* including) the entry at index. Increment nEntries by 1. Called
* in insertEntry1()
*
* @param index - The position to start shifting from.
*/
private void shiftEntriesRight(int index) {
copyEntries(index, index + 1, nEntries - index);
clearEntry(index);
nEntries++;
setDirty(true);
}
/**
* Shift entries starting at the byHowMuch'th element to the left, thus
* removing the first byHowMuch'th elements of the entries array. This
* always starts at the 0th entry. Caller is responsible for decrementing
* nEntries.
*
* @param byHowMuch - The number of entries to remove from the left side
* of the entries array.
*/
private void shiftEntriesLeft(int byHowMuch) {
copyEntries(byHowMuch, 0, nEntries - byHowMuch);
for (int i = nEntries - byHowMuch; i < nEntries; i++) {
clearEntry(i);
}
setDirty(true);
}
void adjustCursors(
IN newSibling,
int newSiblingLow,
int newSiblingHigh) {
/* Cursors never refer to IN's. */
}
void adjustCursorsForInsert(int insertIndex) {
/* Cursors never refer to IN's. */
}
/**
* Called prior to changing a slot to contain a different logical node.
*
* Necessary to support assertions for transient LSNs in shouldUpdateLsn.
* Examples: LN slot reuse, and splits where a new node is placed in an
* existing slot.
*
* Also needed to free the off-heap BIN associated with the old node.
*
* TODO: This method is no longer used for LN slot reuse, and freeing of
* the off-heap BIN could be done by the only caller, splitInternal, and
* then this method could be removed.
*/
public void prepareForSlotReuse(int idx) {
if (databaseImpl.isDeferredWriteMode()) {
setLsn(idx, DbLsn.NULL_LSN, false/*check*/);
}
final OffHeapCache ohCache = getOffHeapCache();
if (ohCache.isEnabled() && getNormalizedLevel() == 2) {
ohCache.freeBIN((BIN) getTarget(idx), this, idx);
}
}
/*
* Get the current memory consumption of this node
*/
public long getInMemorySize() {
return inMemorySize;
}
/**
* Compute the current memory consumption of this node, after putting
* its keys in their compact representation, if possible.
*/
private void initMemorySize() {
entryKeys = entryKeys.compact(this);
inMemorySize = computeMemorySize();
}
/**
* Count up the memory usage attributable to this node alone. LNs children
* are counted by their BIN parents, but INs are not counted by their
* parents because they are resident on the IN list. The identifierKey is
* "intentionally" not kept track of in the memory budget.
*/
public long computeMemorySize() {
long calcMemorySize = getFixedMemoryOverhead();
calcMemorySize += MemoryBudget.byteArraySize(entryStates.length);
calcMemorySize += computeLsnOverhead();
for (int i = 0; i < nEntries; i++) {
calcMemorySize += getEntryInMemorySize(i);
}
if (keyPrefix != null) {
calcMemorySize += MemoryBudget.byteArraySize(keyPrefix.length);
}
if (provisionalObsolete != null) {
calcMemorySize += provisionalObsolete.getMemorySize();
}
calcMemorySize += entryTargets.calculateMemorySize();
calcMemorySize += entryKeys.calculateMemorySize();
if (offHeapBINIds != null) {
calcMemorySize += offHeapBINIds.getMemorySize();
}
return calcMemorySize;
}
/*
* Overridden by subclasses.
*/
protected long getFixedMemoryOverhead() {
return MemoryBudget.IN_FIXED_OVERHEAD;
}
/*
* Compute the memory consumption for storing this node's LSNs
*/
private int computeLsnOverhead() {
return (entryLsnLongArray == null) ?
MemoryBudget.byteArraySize(entryLsnByteArray.length) :
MemoryBudget.ARRAY_OVERHEAD +
(entryLsnLongArray.length *
MemoryBudget.PRIMITIVE_LONG_ARRAY_ITEM_OVERHEAD);
}
private long getEntryInMemorySize(int idx) {
/*
* Do not count state size here, since it is counted as overhead
* during initialization.
*/
long ret = 0;
/*
* Don't count the key size if the representation has already
* accounted for it.
*/
if (!entryKeys.accountsForKeyByteMemUsage()) {
/*
* Materialize the key object only if needed, thus avoiding the
* object allocation cost when possible.
*/
final byte[] key = entryKeys.get(idx);
if (key != null) {
ret += MemoryBudget.byteArraySize(key.length);
}
}
final Node target = entryTargets.get(idx);
if (target != null) {
ret += target.getMemorySizeIncludedByParent();
}
return ret;
}
/**
* Compacts the representation of the IN, if possible.
*
* Called by the evictor to reduce memory usage. Should not be called too
* often (e.g., every CRUD operation), since this could cause lots of
* memory allocations as the representations contract and expend, resulting
* in expensive GC.
*
* @return number of bytes reclaimed.
*/
public long compactMemory() {
final long oldSize = inMemorySize;
final INKeyRep oldKeyRep = entryKeys;
entryTargets = entryTargets.compact(this);
entryKeys = entryKeys.compact(this);
offHeapBINIds = offHeapBINIds.compact(this, EMPTY_OFFHEAP_BIN_IDS);
/*
* Note that we only need to account for mem usage changes in the key
* rep here, not the target rep. The target rep, unlike the key rep,
* updates its mem usage internally, and the responsibility for mem
* usage of contained nodes is fixed -- it is always managed by the IN.
*
* When the key rep changes, the accountsForKeyByteMemUsage property
* also changes. Recalc the size of the entire IN, because
* responsibility for managing contained key byte mem usage has shifted
* between the key rep and the IN parent.
*/
if (entryKeys != oldKeyRep) {
updateMemorySize(inMemorySize, computeMemorySize());
}
return oldSize - inMemorySize;
}
/**
* Returns the amount of memory currently budgeted for this IN.
*/
public long getBudgetedMemorySize() {
return inMemorySize - accumulatedDelta;
}
/**
* Called as part of a memory budget reset (during a checkpoint) to clear
* the accumulated delta and return the total memory size.
*/
public long resetAndGetMemorySize() {
accumulatedDelta = 0;
return inMemorySize;
}
protected void updateMemorySize(long oldSize, long newSize) {
long delta = newSize - oldSize;
updateMemorySize(delta);
}
/*
* Called when a cached child is replaced by another cached child.
*/
void updateMemorySize(Node oldNode, Node newNode) {
long delta = 0;
if (newNode != null) {
delta = newNode.getMemorySizeIncludedByParent();
}
if (oldNode != null) {
delta -= oldNode.getMemorySizeIncludedByParent();
}
updateMemorySize(delta);
}
/*
* Change this.onMemorySize by the given delta and update the memory
* budget for the cache, but only if the accummulated delta for this
* node exceeds the ACCUMULATED_LIMIT threshold and this IN is actually
* on the IN list. (For example, when we create new INs, they are
* manipulated off the IN list before being added; if we updated the
* environment wide cache then, we'd end up double counting.)
*/
void updateMemorySize(long delta) {
if (delta == 0) {
return;
}
inMemorySize += delta;
if (getInListResident()) {
/*
* This assertion is disabled if the environment is invalid to
* avoid spurious assertions during testing of IO errors. If the
* environment is invalid, memory budgeting errors are irrelevant.
* [#21929]
*/
assert
inMemorySize >= getFixedMemoryOverhead() ||
!getEnv().isValid():
"delta: " + delta + " inMemorySize: " + inMemorySize +
" overhead: " + getFixedMemoryOverhead() +
" computed: " + computeMemorySize() +
" dump: " + toString() + assertPrintMemorySize();
accumulatedDelta += delta;
if (accumulatedDelta > ACCUMULATED_LIMIT ||
accumulatedDelta < -ACCUMULATED_LIMIT) {
updateMemoryBudget();
}
}
}
/**
* Move the accumulated delta to the memory budget.
*/
public void updateMemoryBudget() {
final EnvironmentImpl env = getEnv();
env.getInMemoryINs().memRecalcUpdate(this, accumulatedDelta);
env.getMemoryBudget().updateTreeMemoryUsage(accumulatedDelta);
accumulatedDelta = 0;
}
/*
* Utility method used during unit testing.
*/
protected long printMemorySize() {
final long inOverhead = getFixedMemoryOverhead();
final long statesOverhead =
MemoryBudget.byteArraySize(entryStates.length);
final int lsnOverhead = computeLsnOverhead();
int entryOverhead = 0;
for (int i = 0; i < nEntries; i++) {
entryOverhead += getEntryInMemorySize(i);
}
final int keyPrefixOverhead = (keyPrefix != null) ?
MemoryBudget.byteArraySize(keyPrefix.length) : 0;
final int provisionalOverhead = (provisionalObsolete != null) ?
provisionalObsolete.getMemorySize() : 0;
final long targetRepOverhead = entryTargets.calculateMemorySize();
final long keyRepOverhead = entryKeys.calculateMemorySize();
final long total = inOverhead + statesOverhead + lsnOverhead +
entryOverhead + keyPrefixOverhead + provisionalOverhead +
targetRepOverhead + keyRepOverhead;
final long offHeapBINIdOverhead = offHeapBINIds.getMemorySize();
System.out.println(" nEntries:" + nEntries +
"/" + entryStates.length +
" in: " + inOverhead +
" states: " + statesOverhead +
" entry: " + entryOverhead +
" lsn: " + lsnOverhead +
" keyPrefix: " + keyPrefixOverhead +
" provisional: " + provisionalOverhead +
" targetRep(" + entryTargets.getType() + "): " +
targetRepOverhead +
" keyRep(" + entryKeys.getType() +"): " +
keyRepOverhead +
" offHeapBINIds: " + offHeapBINIdOverhead +
" Total: " + total +
" inMemorySize: " + inMemorySize);
return total;
}
/* Utility method used to print memory size in an assertion. */
private boolean assertPrintMemorySize() {
printMemorySize();
return true;
}
public boolean verifyMemorySize() {
long calcMemorySize = computeMemorySize();
if (calcMemorySize != inMemorySize) {
String msg = "-Warning: Out of sync. Should be " +
calcMemorySize + " / actual: " + inMemorySize +
" node: " + getNodeId();
LoggerUtils.envLogMsg(Level.INFO, getEnv(), msg);
System.out.println(msg);
printMemorySize();
return false;
}
return true;
}
/**
* Adds (increments) or removes (decrements) the cache stats for the key
* and target representations. Used when rep objects are being replaced
* with a new instance, rather than by calling their mutator methods.
* Specifically, it is called when mutating from full bin to bin delta
* or vice-versa.
*/
protected void updateRepCacheStats(boolean increment) {
assert(isBIN());
entryKeys.updateCacheStats(increment, this);
entryTargets.updateCacheStats(increment, this);
}
protected int getCompactMaxKeyLength() {
return getEnv().getCompactMaxKeyLength();
}
/**
* Called when adding/removing this IN to/from the INList.
*/
public void setInListResident(boolean resident) {
if (!resident) {
/* Decrement the stats before clearing its residency */
entryTargets.updateCacheStats(false, this);
entryKeys.updateCacheStats(false, this);
}
if (resident) {
flags |= IN_RESIDENT_BIT;
} else {
flags &= ~IN_RESIDENT_BIT;
}
if (resident) {
/* Increment the stats after setting its residency. */
entryTargets.updateCacheStats(true, this);
entryKeys.updateCacheStats(true, this);
}
}
/**
* Returns whether this IN is on the INList.
*/
public boolean getInListResident() {
return (flags & IN_RESIDENT_BIT) != 0;
}
public IN getPrevLRUNode() {
return prevLRUNode;
}
public void setPrevLRUNode(IN node) {
prevLRUNode = node;
}
public IN getNextLRUNode() {
return nextLRUNode;
}
public void setNextLRUNode(IN node) {
nextLRUNode = node;
}
/**
* Try to compact or otherwise reclaim memory in this IN and return the
* number of bytes reclaimed. For example, a BIN should evict LNs, if
* possible.
*
* Used by the evictor to reclaim memory by some means short of evicting
* the entire node. If a positive value is returned, the evictor will
* postpone full eviction of this node.
*/
public long partialEviction() {
return 0;
}
/**
* Returns whether any child is non-null in the main or off-heap cache.
*/
public boolean hasCachedChildren() {
assert isLatchOwner();
for (int i = 0; i < getNEntries(); i++) {
if (entryTargets.get(i) != null) {
return true;
}
}
return false;
}
/**
* Disallow delta on next log. Set to true (a) when we we delete a slot
* from a BIN, (b) when the cleaner marks a BIN as dirty so that it will
* be migrated during the next checkpoint.
*/
public void setProhibitNextDelta(boolean val) {
if (!isBIN()) {
return;
}
if (val) {
flags |= IN_PROHIBIT_NEXT_DELTA_BIT;
} else {
flags &= ~IN_PROHIBIT_NEXT_DELTA_BIT;
}
}
public boolean getProhibitNextDelta() {
return (flags & IN_PROHIBIT_NEXT_DELTA_BIT) != 0;
}
/*
* Validate the subtree that we're about to delete. Make sure there aren't
* more than one valid entry on each IN and that the last level of the tree
* is empty. Also check that there are no cursors on any bins in this
* subtree. Assumes caller is holding the latch on this parent node.
*
* While we could latch couple down the tree, rather than hold latches as
* we descend, we are presumably about to delete this subtree so
* concurrency shouldn't be an issue.
*
* @return true if the subtree rooted at the entry specified by "index" is
* ok to delete.
*
* Overriden by BIN class.
*/
boolean validateSubtreeBeforeDelete(int index)
throws DatabaseException {
if (index >= nEntries) {
/*
* There's no entry here, so of course this entry is deletable.
*/
return true;
} else {
IN child = fetchIN(index, CacheMode.UNCHANGED);
boolean needToLatch = !child.isLatchExclusiveOwner();
try {
if (needToLatch) {
child.latch(CacheMode.UNCHANGED);
}
return child.isValidForDelete();
} finally {
if (needToLatch && isLatchOwner()) {
child.releaseLatch();
}
}
}
}
/**
* Check if this node fits the qualifications for being part of a deletable
* subtree. It can only have one IN child and no LN children.
*
* Note: the method is overwritten by BIN and LN.
* BIN.isValidForDelete() will not fetch any child LNs.
* LN.isValidForDelete() simply returns false.
*
* We assume that this is only called under an assert.
*/
@Override
boolean isValidForDelete()
throws DatabaseException {
assert(!isBINDelta());
/*
* Can only have one valid child, and that child should be
* deletable.
*/
if (nEntries > 1) { // more than 1 entry.
return false;
} else if (nEntries == 1) { // 1 entry, check child
IN child = fetchIN(0, CacheMode.UNCHANGED);
boolean needToLatch = !child.isLatchExclusiveOwner();
if (needToLatch) {
child.latch(CacheMode.UNCHANGED);
}
boolean ret = false;
try {
if (child.isBINDelta()) {
return false;
}
ret = child.isValidForDelete();
} finally {
if (needToLatch) {
child.releaseLatch();
}
}
return ret;
} else { // 0 entries.
return true;
}
}
/**
* Add self and children to this in-memory IN list. Called by recovery, can
* run with no latching.
*/
@Override
final void rebuildINList(INList inList)
throws DatabaseException {
/*
* Recompute your in memory size first and then add yourself to the
* list.
*/
initMemorySize();
inList.add(this);
boolean hasCachedChildren = false;
/*
* Add your children if they're resident. (LNs know how to stop the
* flow).
*/
for (int i = 0; i < nEntries; i++) {
Node n = getTarget(i);
if (n != null) {
n.rebuildINList(inList);
hasCachedChildren = true;
}
if (getOffHeapBINId(i) >= 0) {
hasCachedChildren = true;
}
}
if (isUpperIN()) {
if (hasCachedChildren) {
setHasCachedChildrenFlag(true);
} else {
setHasCachedChildrenFlag(false);
if (!isDIN()) {
if (traceLRU) {
LoggerUtils.envLogMsg(
traceLevel, getEnv(),
"rebuildINList " +
Thread.currentThread().getId() +
"-" +
Thread.currentThread().getName() +
"-" + getEnv().getName() +
" Adding UIN to LRU: " +
getNodeId());
}
getEvictor().addBack(this);
}
}
} else if (isBIN() && !isDBIN()) {
getEvictor().addBack(this);
}
}
/*
* DbStat support.
*/
void accumulateStats(TreeWalkerStatsAccumulator acc) {
acc.processIN(this, getNodeId(), getLevel());
}
/**
* Sets the last logged LSN, which for a BIN may be a delta.
*
* It is called from IN.postFetch/RecoveryInit(). If the logrec we have
* just read was a BINDelta, this.lastFullVersion has already been set (in
* BINDeltaLogEntry.readMainItem() or in OldBinDelta.reconstituteBIN()).
* So, this method will set this.lastDeltaVersion. Otherwise, if the
* logrec was a full BIN, this.lastFullVersion has not been set yet,
* and it will be set here. In this case, this.lastDeltaVersion will
* remain NULL.
*/
public void setLastLoggedLsn(long lsn) {
if (isBIN()) {
if (getLastFullLsn() == DbLsn.NULL_LSN) {
setLastFullLsn(lsn);
} else {
((BIN)this).setLastDeltaLsn(lsn);
}
} else {
setLastFullLsn(lsn);
}
}
/**
* Returns the LSN of the last last logged version of this IN, or NULL_LSN
* if never logged.
*/
public final long getLastLoggedLsn() {
if (isBIN()) {
return (getLastDeltaLsn() != DbLsn.NULL_LSN ?
getLastDeltaLsn() :
getLastFullLsn());
}
return getLastFullLsn();
}
/**
* Sets the last full version LSN.
*/
public final void setLastFullLsn(long lsn) {
lastFullVersion = lsn;
}
/**
* Returns the last full version LSN, or NULL_LSN if never logged.
*/
public final long getLastFullLsn() {
return lastFullVersion;
}
/**
* Returns the last delta version LSN, or NULL_LSN if a delta was not last
* logged. For BINs, it just returns the value of the lastDeltaVersion
* field. Public for unit testing.
*/
public long getLastDeltaLsn() {
return DbLsn.NULL_LSN;
}
/*
* Logging support
*/
/**
* When splits and checkpoints intermingle in a deferred write databases,
* a checkpoint target may appear which has a valid target but a null LSN.
* Deferred write dbs are written out in checkpoint style by either
* Database.sync() or a checkpoint which has cleaned a file containing
* deferred write entries. For example,
* INa
* |
* BINb
*
* A checkpoint or Database.sync starts
* The INList is traversed, dirty nodes are selected
* BINb is bypassed on the INList, since it's not dirty
* BINb is split, creating a new sibling, BINc, and dirtying INa
* INa is selected as a dirty node for the ckpt
*
* If this happens, INa is in the selected dirty set, but not its dirty
* child BINb and new child BINc.
*
* In a durable db, the existence of BINb and BINc are logged
* anyway. But in a deferred write db, there is an entry that points to
* BINc, but no logged version.
*
* This will not cause problems with eviction, because INa can't be
* evicted until BINb and BINc are logged, are non-dirty, and are detached.
* But it can cause problems at recovery, because INa will have a null LSN
* for a valid entry, and the LN children of BINc will not find a home.
*
* To prevent this, search for all dirty children that might have been
* missed during the selection phase, and write them out. It's not
* sufficient to write only null-LSN children, because the existing sibling
* must be logged lest LN children recover twice (once in the new sibling,
* once in the old existing sibling.
*
* TODO:
* Would the problem above be solved by logging dirty nodes using a tree
* traversal (post-order), rather than using the dirty map?
*/
public void logDirtyChildren()
throws DatabaseException {
assert(!isBINDelta());
EnvironmentImpl envImpl = getDatabase().getEnv();
/* Look for targets that are dirty. */
for (int i = 0; i < getNEntries(); i++) {
IN child = (IN) getTarget(i);
if (child != null) {
child.latch(CacheMode.UNCHANGED);
try {
if (child.getDirty()) {
/* Ask descendants to log their children. */
child.logDirtyChildren();
long childLsn =
child.log(false, // allowDeltas
true, // isProvisional
true, // backgroundIO
this); // parent
updateEntry(
i, childLsn, VLSN.NULL_VLSN_SEQUENCE,
0/*lastLoggedSize*/);
}
} finally {
child.releaseLatch();
}
}
}
}
public final long log() {
return logInternal(
this, null, false /*allowDeltas*/, true /*allowCompress*/,
Provisional.NO, false /*backgroundIO*/, null /*parent*/);
}
public final long log(
boolean allowDeltas,
boolean isProvisional,
boolean backgroundIO,
IN parent) {
return logInternal(
this, null, allowDeltas, true /*allowCompress*/,
isProvisional ? Provisional.YES : Provisional.NO,
backgroundIO, parent);
}
public final long log(
boolean allowDeltas,
Provisional provisional,
boolean backgroundIO,
IN parent) {
return logInternal(
this, null, allowDeltas, true /*allowCompress*/, provisional,
backgroundIO, parent);
}
final long optionalLog() {
if (databaseImpl.isDeferredWriteMode()) {
return getLastLoggedLsn();
} else {
return logInternal(
this, null, false /*allowDeltas*/, true /*allowCompress*/,
Provisional.NO, false /*backgroundIO*/, null /*parent*/);
}
}
long optionalLogProvisional(IN parent) {
return optionalLogProvisional(parent, true /*allowCompress*/);
}
long optionalLogProvisionalNoCompress(IN parent) {
return optionalLogProvisional(parent, false /*allowCompress*/);
}
private long optionalLogProvisional(IN parent, boolean allowCompress) {
if (databaseImpl.isDeferredWriteMode()) {
return getLastLoggedLsn();
} else {
return logInternal(
this, null, false /*allowDeltas*/, allowCompress,
Provisional.YES, false /*backgroundIO*/, parent);
}
}
public static long logEntry(
INLogEntry<BIN> logEntry,
Provisional provisional,
boolean backgroundIO,
IN parent) {
return logInternal(
null, logEntry, true /*allowDeltas*/, false /*allowCompress*/,
provisional, backgroundIO, parent);
}
/**
* Bottleneck method for all IN logging.
*
* If 'node' is non-null, 'logEntry' must be null.
* If 'node' is null, 'logEntry' and 'parent' must be non-null.
*
* When 'logEntry' is non-null we are logging an off-heap BIN, and it is
* not resident in the main cache. The lastFull/DeltaLsns are not updated
* here, and this must be done instead by the caller.
*
* When 'node' is non-null, 'parent' may or may not be null. It must be
* non-null when logging provisionally, since obsolete LSNs are added to
* the parent's collection.
*/
private static long logInternal(
final IN node,
INLogEntry<?> logEntry,
final boolean allowDeltas,
final boolean allowCompress,
final Provisional provisional,
final boolean backgroundIO,
final IN parent) {
assert node == null || node.isLatchExclusiveOwner();
assert parent == null || parent.isLatchExclusiveOwner();
assert node != null || parent != null;
assert (node == null) != (logEntry == null);
final DatabaseImpl dbImpl =
(node != null) ? node.getDatabase() : parent.getDatabase();
final EnvironmentImpl envImpl = dbImpl.getEnv();
final boolean countObsoleteNow =
provisional != Provisional.YES || dbImpl.isTemporary();
final boolean isBin = (node != null) ?
node.isBIN() : (parent.getNormalizedLevel() == 2);
final BIN bin = (node != null && isBin) ? ((BIN) node) : null;
final boolean isDelta;
if (isBin) {
if (logEntry != null) {
/*
* When a logEntry is supplied (node/bin are null), the logic
* below is implemented by OffHeapCache.createBINLogEntry.
*/
isDelta = logEntry.isBINDelta();
} else {
/* Compress non-dirty slots before determining delta status. */
if (allowCompress) {
envImpl.lazyCompress(bin, false /*compressDirtySlots*/);
}
isDelta = bin.isBINDelta() ||
(allowDeltas && bin.shouldLogDelta());
/* Be sure that we didn't illegally mutate to a delta. */
assert (!(isDelta && bin.isDeltaProhibited()));
/* Also compress dirty slots, if we will not log a delta. */
if (allowCompress && !isDelta) {
envImpl.lazyCompress(bin, true /*compressDirtySlots*/);
}
/*
* Write dirty LNs in deferred-write databases after
* compression to reduce total logging, at least for temp DBs.
*/
if (dbImpl.isDeferredWriteMode()) {
bin.logDirtyChildren();
}
logEntry = isDelta ?
(new BINDeltaLogEntry(bin)) :
(new INLogEntry<>(bin));
}
} else {
assert node != null;
isDelta = false;
logEntry = new INLogEntry<>(node);
}
final LogParams params = new LogParams();
params.entry = logEntry;
params.provisional = provisional;
params.repContext = ReplicationContext.NO_REPLICATE;
params.nodeDb = dbImpl;
params.backgroundIO = backgroundIO;
/*
* For delta logging:
* + Count lastDeltaVersion obsolete, if non-null.
* + Set lastDeltaVersion to newly logged LSN.
* + Leave lastFullVersion unchanged.
*
* For full version logging:
* + Count lastFullVersion and lastDeltaVersion obsolete, if non-null.
* + Set lastFullVersion to newly logged LSN.
* + Set lastDeltaVersion to null.
*/
final long oldLsn =
isDelta ? DbLsn.NULL_LSN : logEntry.getPrevFullLsn();
final long auxOldLsn = logEntry.getPrevDeltaLsn();
/*
* Determine whether to count the prior version of an IN (as well as
* accumulated provisionally obsolete LSNs for child nodes) obsolete
* when logging the new version.
*
* True is set if we are logging the IN non-provisionally, since the
* non-provisional version durably replaces the prior version and
* causes all provisional children to also become durable.
*
* True is also set if the database is temporary. Since we never use a
* temporary DB past recovery, prior versions of an IN are never used.
* [#16928]
*/
if (countObsoleteNow) {
params.oldLsn = oldLsn;
params.auxOldLsn = auxOldLsn;
params.packedObsoleteInfo =
(node != null) ? node.provisionalObsolete : null;
}
/* Log it. */
final LogItem item = envImpl.getLogManager().log(params);
if (node != null) {
node.setDirty(false);
}
if (countObsoleteNow) {
if (node != null) {
node.discardProvisionalObsolete();
}
} else if (parent != null) {
parent.trackProvisionalObsolete(node, oldLsn);
parent.trackProvisionalObsolete(node, auxOldLsn);
/*
* TODO:
* The parent is null and provisional is YES when evicting the root
* of a DW DB. How does obsolete counting happen?
*/
}
if (bin != null) {
/*
* When a logEntry is supplied (node/bin are null), the logic
* below is implemented by OffHeapCache.postBINLog.
*/
if (isDelta) {
bin.setLastDeltaLsn(item.lsn);
} else {
bin.setLastFullLsn(item.lsn);
bin.setLastDeltaLsn(DbLsn.NULL_LSN);
}
bin.setProhibitNextDelta(false);
} else if (node != null) {
node.setLastFullLsn(item.lsn);
}
final Evictor evictor = envImpl.getEvictor();
if (node != null && evictor.getUseDirtyLRUSet()) {
/*
* To capture all cases where a node needs to be moved to the
* priority-1 LRUSet after being cleaned, we invoke moveToPri1LRU()
* from IN.afterLog(). This includes the case where the node is
* being logged as part of being evicted, in which case we don't
* really want it to go back to the LRU. However, this is ok
* because moveToPri1LRU() checks whether the node is actually
* in the priority-2 LRUSet before moving it to the priority-1
* LRUSet.
*/
if (traceLRU && node.isUpperIN()) {
LoggerUtils.envLogMsg(
traceLevel, envImpl,
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() +
"-" + envImpl.getName() +
" afterLogCommon(): " +
" Moving UIN to mixed LRU: " + node.getNodeId());
}
evictor.moveToPri1LRU(node);
}
return item.lsn;
}
/**
* Adds the given obsolete LSN and any tracked obsolete LSNs for the given
* child IN to this IN's tracking list. This method is called to track
* obsolete LSNs when a child IN is logged provisionally. Such LSNs
* cannot be considered obsolete until an ancestor IN is logged
* non-provisionally.
*/
void trackProvisionalObsolete(final IN childIN, final long obsoleteLsn) {
final boolean moveChildInfo =
(childIN != null && childIN.provisionalObsolete != null);
final boolean addChildLsn = (obsoleteLsn != DbLsn.NULL_LSN);
if (!moveChildInfo && !addChildLsn) {
return;
}
final int oldMemSize = (provisionalObsolete != null) ?
provisionalObsolete.getMemorySize() : 0;
if (moveChildInfo) {
if (provisionalObsolete != null) {
/* Append child info to parent info. */
provisionalObsolete.copyObsoleteInfo
(childIN.provisionalObsolete);
} else {
/* Move reference from child to parent. */
provisionalObsolete = childIN.provisionalObsolete;
}
childIN.updateMemorySize(
0 - childIN.provisionalObsolete.getMemorySize());
childIN.provisionalObsolete = null;
}
if (addChildLsn) {
if (provisionalObsolete == null) {
provisionalObsolete = new PackedObsoleteInfo();
}
provisionalObsolete.addObsoleteInfo(obsoleteLsn);
}
updateMemorySize(oldMemSize,
(provisionalObsolete != null) ?
provisionalObsolete.getMemorySize() :
0);
}
/**
* Discards the provisional obsolete tracking information in this node
* after it has been counted in the live tracker. This method is called
* after this node is logged non-provisionally.
*/
private void discardProvisionalObsolete()
throws DatabaseException {
if (provisionalObsolete != null) {
updateMemorySize(0 - provisionalObsolete.getMemorySize());
provisionalObsolete = null;
}
}
/*
* NOOP for upper INs. Overriden by BIN class.
*/
public void mutateToFullBIN(boolean leaveFreeSlot) {
}
/**
* Only write dirty slots when logging a BIN-delta.
*
* Don't write slots that are extinct when logging a full BIN, to support
* erasure of extinct records. Skipping the extinct slots during logging
* is necessary because extinct slots cannot be deleted by BIN.compress
* when a cursor is present.
*
* Otherwise, when logging an IN, write all slots.
*/
private int getNEntriesToWrite(boolean deltasOnly) {
if (deltasOnly) {
return getNDeltas();
}
if (!isBIN()) {
return nEntries;
}
int n = 0;
for (int i = 0; i < nEntries; i++) {
if (canDeleteExtinctSlot(i)) {
continue;
}
n += 1;
}
return n;
}
/**
* Returns whether the given slot is extinct and can be deleted during
* logging.
*
* Check KnownDeleted before checking for an extinct slot, to
* reduce calls to the more expensive getExtinctionState method.
* KnownDeleted is set on extinct slots by the ExtinctionScanner.
*/
private boolean canDeleteExtinctSlot(int idx) {
assert isBIN();
final EnvironmentImpl envImpl = getEnv();
/*
* The check for isValid is necessary because we cannot call the
* extinction filter until recovery is complete and we have the DB
* name and type [#27022]. In the future we may wish to implement a
* more comprehensive check in getExtinctionState.
*/
return envImpl.isValid() &&
isEntryKnownDeleted(idx) &&
envImpl.getExtinctionState(databaseImpl, getKey(idx)) == EXTINCT;
}
public final int getNDeltas() {
int n = 0;
for (int i = 0; i < nEntries; i++) {
if (!isDirty(i)) {
continue;
}
n += 1;
}
return n;
}
/**
* @see Node#getGenericLogType
*/
@Override
public final LogEntryType getGenericLogType() {
return getLogType();
}
/**
* Get the log type of this node.
*/
public LogEntryType getLogType() {
return LogEntryType.LOG_IN;
}
/**
* @see Loggable#getLogSize
*
* Overrriden by DIN and DBIN classes.
*/
@Override
public int getLogSize() {
return getLogSize(false);
}
public final int getLogSize(boolean deltasOnly) {
BIN bin = (isBIN() ? (BIN)this : null);
boolean haveVLSNCache = (bin != null && bin.isVLSNCachingEnabled());
int size = 0;
boolean haveExpiration = false;
if (bin != null) {
int base = bin.getExpirationBase();
haveExpiration = (base != -1);
size += LogUtils.getPackedIntLogSize(base);
}
size += LogUtils.getPackedLongLogSize(nodeId);
size += LogUtils.getByteArrayLogSize(identifierKey); // identifier key
if (keyPrefix != null) {
size += LogUtils.getByteArrayLogSize(keyPrefix);
}
size += 1; // one byte for boolean flags
final int nEntriesToWrite = getNEntriesToWrite(deltasOnly);
final int maxEntriesToWrite =
(!deltasOnly ?
getMaxEntries() :
bin.getDeltaCapacity(nEntriesToWrite));
size += LogUtils.getPackedIntLogSize(nEntriesToWrite);
size += LogUtils.getPackedIntLogSize(level);
size += LogUtils.getPackedIntLogSize(maxEntriesToWrite);
final boolean compactLsnsRep = (entryLsnLongArray == null);
size += LogUtils.getBooleanLogSize(); // compactLsnsRep
if (compactLsnsRep) {
size += LogUtils.INT_BYTES; // baseFileNumber
}
for (int i = 0; i < nEntries; i++) { // entries
/* See getNEntriesToWrite and canDeleteExtinctSlot. */
if (deltasOnly) {
if (!isDirty(i)) {
continue;
}
} else {
if (bin != null && canDeleteExtinctSlot(i)) {
continue;
}
}
size += LogUtils.getByteArrayLogSize(entryKeys.get(i)) + // key
(compactLsnsRep ? LogUtils.INT_BYTES :
LogUtils.getLongLogSize()) + // LSN
1; // state
if (isLastLoggedSizeStored(i)) {
size += LogUtils.getPackedIntLogSize(getLastLoggedSize(i));
}
if (haveVLSNCache && isEmbeddedLN(i)) {
size += LogUtils.getPackedLongLogSize(bin.getCachedVLSN(i));
}
if (haveExpiration) {
size +=
LogUtils.getPackedIntLogSize(bin.getExpirationOffset(i));
}
}
if (deltasOnly) {
size += LogUtils.getPackedIntLogSize(bin.getFullBinNEntries());
size += LogUtils.getPackedIntLogSize(bin.getFullBinMaxEntries());
size += bin.getBloomFilterLogSize();
}
return size;
}
/*
* Overridden by DIN and DBIN classes.
*/
@Override
public void writeToLog(ByteBuffer logBuffer) {
serialize(logBuffer, false /*deltasOnly*/, true /*clearDirtyBits*/);
}
public void writeToLog(ByteBuffer logBuffer, boolean deltasOnly) {
serialize(logBuffer, deltasOnly, !deltasOnly /*clearDirtyBits*/);
}
/**
* WARNING: In the case of BINs this method is not only used for logging
* but also for off-heap caching. Therefore, this method should not have
* side effects unless the clearDirtyBits param is true.
*/
public final void serialize(ByteBuffer logBuffer,
boolean deltasOnly,
boolean clearDirtyBits) {
assert(!deltasOnly || isBIN());
BIN bin = (isBIN() ? (BIN)this : null);
byte[] bloomFilter = (deltasOnly ? bin.createBloomFilter() : null);
boolean haveExpiration = false;
if (bin != null) {
int base = bin.getExpirationBase();
haveExpiration = (base != -1);
LogUtils.writePackedInt(logBuffer, base);
}
LogUtils.writePackedLong(logBuffer, nodeId);
LogUtils.writeByteArray(logBuffer, identifierKey);
boolean hasKeyPrefix = (keyPrefix != null);
boolean mayHaveLastLoggedSize = mayHaveLastLoggedSizeStored();
boolean haveVLSNCache = (bin != null && bin.isVLSNCachingEnabled());
byte booleans = (byte) (isRoot() ? 1 : 0);
booleans |= (hasKeyPrefix ? 2 : 0);
booleans |= (mayHaveLastLoggedSize ? 4 : 0);
booleans |= (bloomFilter != null ? 8 : 0);
booleans |= (haveVLSNCache ? 16 : 0);
booleans |= (isExpirationInHours() ? 32 : 0);
logBuffer.put(booleans);
if (hasKeyPrefix) {
LogUtils.writeByteArray(logBuffer, keyPrefix);
}
final int nEntriesToWrite = getNEntriesToWrite(deltasOnly);
final int maxEntriesToWrite =
(!deltasOnly ?
getMaxEntries() :
bin.getDeltaCapacity(nEntriesToWrite));
/*
if (deltasOnly) {
BIN bin = (BIN)this;
System.out.println(
"Logging BIN-delta: " + getNodeId() +
" is delta = " + isBINDelta() +
" nEntries = " + nEntriesToWrite +
" max entries = " + maxEntriesToWrite +
" full BIN entries = " + bin.getFullBinNEntries() +
" full BIN max entries = " + bin.getFullBinMaxEntries());
}
*/
LogUtils.writePackedInt(logBuffer, nEntriesToWrite);
LogUtils.writePackedInt(logBuffer, level);
LogUtils.writePackedInt(logBuffer, maxEntriesToWrite);
/* true if compact representation. */
boolean compactLsnsRep = (entryLsnLongArray == null);
LogUtils.writeBoolean(logBuffer, compactLsnsRep);
if (compactLsnsRep) {
LogUtils.writeInt(logBuffer, (int) baseFileNumber);
}
for (int i = 0; i < nEntries; i++) {
/* See getNEntriesToWrite and canDeleteExtinctSlot. */
if (deltasOnly) {
if (!isDirty(i)) {
continue;
}
} else {
if (bin != null && canDeleteExtinctSlot(i)) {
continue;
}
}
LogUtils.writeByteArray(logBuffer, entryKeys.get(i));
/*
* A NULL_LSN may be stored when an incomplete insertion occurs,
* but in that case the KnownDeleted flag must be set. See
* Tree.insert. [#13126]
*/
assert checkForNullLSN(i) :
"logging IN " + getNodeId() + " with null lsn child " +
" db=" + databaseImpl.getName() +
" isDeferredWriteMode=" + databaseImpl.isDeferredWriteMode() +
" isTemporary=" + databaseImpl.isTemporary();
if (compactLsnsRep) {
int offset = i << 2;
int fileOffset = getFileOffset(offset);
logBuffer.put(getFileNumberOffset(offset));
logBuffer.put((byte) (fileOffset & 0xff));
logBuffer.put((byte) ((fileOffset >>> 8) & 0xff));
logBuffer.put((byte) ((fileOffset >>> 16) & 0xff));
} else {
LogUtils.writeLong(logBuffer, entryLsnLongArray[i]);
}
logBuffer.put(
(byte) (entryStates[i] & EntryStates.CLEAR_TRANSIENT_BITS));
if (clearDirtyBits) {
entryStates[i] &= EntryStates.CLEAR_DIRTY_BIT;
}
if (isLastLoggedSizeStored(i)) {
LogUtils.writePackedInt(logBuffer, getLastLoggedSize(i));
}
if (haveVLSNCache && isEmbeddedLN(i)) {
LogUtils.writePackedLong(logBuffer, bin.getCachedVLSN(i));
}
if (haveExpiration) {
LogUtils.writePackedInt(
logBuffer, bin.getExpirationOffset(i));
}
}
if (deltasOnly) {
LogUtils.writePackedInt(logBuffer, bin.getFullBinNEntries());
LogUtils.writePackedInt(logBuffer, bin.getFullBinMaxEntries());
if (bloomFilter != null) {
BINDeltaBloomFilter.writeToLog(bloomFilter, logBuffer);
}
}
}
/*
* Used for assertion to prevent writing a null lsn to the log.
*/
private boolean checkForNullLSN(int index) {
boolean ok;
if (isBIN()) {
ok = !(getLsn(index) == DbLsn.NULL_LSN &&
(entryStates[index] & EntryStates.KNOWN_DELETED_BIT) == 0);
} else {
ok = (getLsn(index) != DbLsn.NULL_LSN);
}
return ok;
}
/**
* Returns whether the given serialized IN is a BIN that may have
* expiration values.
*/
public boolean mayHaveExpirationValues(
ByteBuffer itemBuffer,
int entryVersion) {
if (!isBIN() || entryVersion < 12) {
return false;
}
itemBuffer.mark();
int expirationBase = LogUtils.readPackedInt(itemBuffer);
itemBuffer.reset();
return (expirationBase != -1);
}
@Override
public void readFromLog(
ByteBuffer itemBuffer,
int entryVersion) {
materialize(
itemBuffer, entryVersion,
false /*deltasOnly*/, true /*clearDirtyBits*/);
}
public void readFromLog(
ByteBuffer itemBuffer,
int entryVersion,
boolean deltasOnly) {
materialize(
itemBuffer, entryVersion,
deltasOnly, !deltasOnly /*clearDirtyBits*/);
}
/**
* WARNING: In the case of BINs this method is used not only for logging
* but also for off-heap caching. Therefore, this method should not have
* side effects unless the clearDirtyBits param is true or an older log
* version is passed (off-heap caching uses the current version).
*/
public final void materialize(
ByteBuffer itemBuffer,
int entryVersion,
boolean deltasOnly,
boolean clearDirtyBits) {
assert(!deltasOnly || isBIN());
BIN bin = (isBIN() ? (BIN)this : null);
boolean unpacked = (entryVersion < 6);
boolean haveExpiration = false;
if (bin != null && entryVersion >= 12) {
int base = LogUtils.readPackedInt(itemBuffer);
haveExpiration = (base != -1);
bin.setExpirationBase(base);
}
nodeId = LogUtils.readLong(itemBuffer, unpacked);
identifierKey = LogUtils.readByteArray(itemBuffer, unpacked);
byte booleans = itemBuffer.get();
setIsRootFlag((booleans & 1) != 0);
if ((booleans & 2) != 0) {
keyPrefix = LogUtils.readByteArray(itemBuffer, unpacked);
}
boolean mayHaveLastLoggedSize = ((booleans & 4) != 0);
assert !(mayHaveLastLoggedSize && (entryVersion < 9));
boolean hasBloomFilter = ((booleans & 8) != 0);
assert(!hasBloomFilter || (entryVersion >= 10 && deltasOnly));
boolean haveVLSNCache = ((booleans & 16) != 0);
assert !(haveVLSNCache && (entryVersion < 11));
setExpirationInHours((booleans & 32) != 0);
nEntries = LogUtils.readInt(itemBuffer, unpacked);
level = LogUtils.readInt(itemBuffer, unpacked);
int length = LogUtils.readInt(itemBuffer, unpacked);
entryTargets = INTargetRep.NONE;
entryKeys = new INKeyRep.Default(length);
baseFileNumber = -1;
long storedBaseFileNumber = -1;
if (disableCompactLsns) {
entryLsnByteArray = null;
entryLsnLongArray = new long[length];
} else {
entryLsnByteArray = new byte[length << 2];
entryLsnLongArray = null;
}
entryStates = new byte[length];
boolean compactLsnsRep = false;
if (entryVersion > 1) {
compactLsnsRep = LogUtils.readBoolean(itemBuffer);
if (compactLsnsRep) {
baseFileNumber = LogUtils.readInt(itemBuffer);
storedBaseFileNumber = baseFileNumber;
}
}
for (int i = 0; i < nEntries; i++) {
entryKeys = entryKeys.set(
i, LogUtils.readByteArray(itemBuffer, unpacked), this);
long lsn;
if (compactLsnsRep) {
/* LSNs in compact form. */
byte fileNumberOffset = itemBuffer.get();
int fileOffset = (itemBuffer.get() & 0xff);
fileOffset |= ((itemBuffer.get() & 0xff) << 8);
fileOffset |= ((itemBuffer.get() & 0xff) << 16);
if (fileOffset == THREE_BYTE_NEGATIVE_ONE) {
lsn = DbLsn.NULL_LSN;
} else {
lsn = DbLsn.makeLsn
(storedBaseFileNumber + fileNumberOffset, fileOffset);
}
} else {
/* LSNs in long form. */
lsn = LogUtils.readLong(itemBuffer); // LSN
}
setLsnInternal(i, lsn);
byte entryState = itemBuffer.get(); // state
if (clearDirtyBits) {
entryState &= EntryStates.CLEAR_DIRTY_BIT;
}
/*
* The MIGRATE_BIT (now the transient OFFHEAP_DIRTY_BIT) was
* accidentally written in a pre-JE 6 log version.
*/
if (entryVersion < 9) {
entryState &= EntryStates.CLEAR_TRANSIENT_BITS;
}
/*
* A NULL_LSN is the remnant of an incomplete insertion and the
* KnownDeleted flag should be set. But because of bugs in prior
* releases, the KnownDeleted flag may not be set. So set it here.
* See Tree.insert. [#13126]
*/
if (entryVersion < 9 && lsn == DbLsn.NULL_LSN) {
entryState |= EntryStates.KNOWN_DELETED_BIT;
}
entryStates[i] = entryState;
if (mayHaveLastLoggedSize && !isEmbeddedLN(i)) {
setLastLoggedSizeUnconditional(
i, LogUtils.readPackedInt(itemBuffer));
}
if (haveVLSNCache && isEmbeddedLN(i)) {
bin.setCachedVLSNUnconditional(
i, LogUtils.readPackedLong(itemBuffer));
}
if (haveExpiration) {
bin.setExpirationOffset(i, LogUtils.readPackedInt(itemBuffer));
}
}
if (deltasOnly) {
setBINDelta(true);
if (entryVersion >= 10) {
bin.setFullBinNEntries(LogUtils.readPackedInt(itemBuffer));
bin.setFullBinMaxEntries(LogUtils.readPackedInt(itemBuffer));
if (hasBloomFilter) {
bin.bloomFilter = BINDeltaBloomFilter.readFromLog(
itemBuffer, entryVersion);
}
}
}
/* Dup conversion will be done by postFetchInit. */
needDupKeyConversion = (entryVersion < 8);
}
/**
* @see Loggable#logicalEquals
* Always return false, this item should never be compared.
*/
@Override
public final boolean logicalEquals(Loggable other) {
return false;
}
/**
* @see Loggable#dumpLog
*/
@Override
public final void dumpLog(StringBuilder sb, boolean verbose) {
sb.append(beginTag());
sb.append("<nodeId val=\"");
sb.append(nodeId);
sb.append("\"/>");
sb.append(Key.dumpString(identifierKey, "idKey", 0));
// isRoot
sb.append("<isRoot val=\"");
sb.append(isRoot());
sb.append("\"/>");
// level
sb.append("<level val=\"");
sb.append(Integer.toHexString(level));
sb.append("\"/>");
if (keyPrefix != null) {
sb.append(Key.dumpString(keyPrefix, "keyPrefix", 0));
}
// nEntries, length of entries array
sb.append("<entries numEntries=\"");
sb.append(nEntries);
sb.append("\" length=\"");
sb.append(getMaxEntries());
final BIN bin = isBIN() ? (BIN) this : null;
if (isBINDelta(false)) {
sb.append("\" numFullBinEntries=\"");
sb.append(bin.getFullBinNEntries());
sb.append("\" maxFullBinEntries=\"");
sb.append(bin.getFullBinMaxEntries());
}
boolean compactLsnsRep = (entryLsnLongArray == null);
if (compactLsnsRep) {
sb.append("\" baseFileNumber=\"");
sb.append(baseFileNumber);
}
if (bin != null && bin.getExpirationBase() != -1) {
sb.append("\" baseExpiration=\"");
sb.append(bin.getExpirationBase());
}
sb.append("\">");
if (verbose) {
for (int i = 0; i < nEntries; i++) {
sb.append("<ref");
dumpSlotState(sb, i, bin);
sb.append(">");
sb.append(Key.dumpString(getKey(i), 0));
if (isEmbeddedLN(i)) {
sb.append(Key.dumpString(getData(i), "data", 0));
}
sb.append(DbLsn.toString(getLsn(i)));
sb.append("</ref>");
}
}
sb.append("</entries>");
if (isBINDelta(false)) {
if (bin.bloomFilter != null) {
BINDeltaBloomFilter.dumpLog(bin.bloomFilter, sb, verbose);
}
}
/* Add on any additional items from subclasses before the end tag. */
dumpLogAdditional(sb);
sb.append(endTag());
}
/**
* Allows subclasses to add additional fields before the end tag. If they
* just overload dumpLog, the xml isn't nested.
*/
protected void dumpLogAdditional(StringBuilder sb) {
}
public String beginTag() {
return BEGIN_TAG;
}
public String endTag() {
return END_TAG;
}
/**
* For unit test support:
* @return a string that dumps information about this IN, without
*/
@Override
public String dumpString(int nSpaces, boolean dumpTags) {
StringBuilder sb = new StringBuilder();
if (dumpTags) {
sb.append(TreeUtils.indent(nSpaces));
sb.append(beginTag());
sb.append('\n');
}
if (dumpTags) {
sb.append(TreeUtils.indent(nSpaces));
sb.append("<nodeId val=\"");
sb.append(nodeId);
sb.append("\"/>");
} else {
sb.append(nodeId);
}
sb.append('\n');
BIN bin = null;
if (isBIN()) {
bin = (BIN) this;
}
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<idkey>");
sb.append(identifierKey == null ?
"" :
Key.dumpString(identifierKey, 0));
sb.append("</idkey>");
sb.append('\n');
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<prefix>");
sb.append(keyPrefix == null ? "" : Key.dumpString(keyPrefix, 0));
sb.append("</prefix>\n");
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<dirty val=\"").append(getDirty()).append("\"/>");
sb.append('\n');
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<level val=\"");
sb.append(Integer.toHexString(level)).append("\"/>");
sb.append('\n');
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<isRoot val=\"").append(isRoot()).append("\"/>");
sb.append('\n');
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<isBINDelta val=\"").append(isBINDelta(false)).append("\"/>");
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append(
"<prohibitNextDelta val=\"").
append(getProhibitNextDelta()).append("\"/>");
if (bin != null) {
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<cursors val=\"").append(bin.nCursors()).append("\"/>");
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<deltas val=\"").append(bin.getNDeltas()).append("\"/>");
}
sb.append('\n');
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("<entries nEntries=\"");
sb.append(nEntries);
sb.append("\">");
sb.append('\n');
for (int i = 0; i < nEntries; i++) {
sb.append(TreeUtils.indent(nSpaces + 4));
sb.append("<entry id=\"").append(i).append("\"");
dumpSlotState(sb, i, bin);
sb.append(">\n");
if (getLsn(i) == DbLsn.NULL_LSN) {
sb.append(TreeUtils.indent(nSpaces + 6));
sb.append("<lsn/>");
} else {
sb.append(DbLsn.dumpString(getLsn(i), nSpaces + 6));
}
sb.append('\n');
if (entryKeys.get(i) == null) {
sb.append(TreeUtils.indent(nSpaces + 6));
sb.append("<key/>");
} else {
sb.append(Key.dumpString(entryKeys.get(i), (nSpaces + 6)));
}
sb.append('\n');
if (getOffHeapBINId(i) >= 0) {
sb.append("<ohBIN id=\"").append(i).append("\"");
sb.append(getOffHeapBINId(i)).append(">\n");
}
if (bin != null && bin.getOffHeapLNId(i) != 0) {
sb.append("<ohLN id=\"").append(i).append("\"");
sb.append(bin.getOffHeapLNId(i)).append(">\n");
}
if (entryTargets.get(i) == null) {
sb.append(TreeUtils.indent(nSpaces + 6));
sb.append("<target/>");
} else {
sb.append(entryTargets.get(i).dumpString(nSpaces + 6, true));
}
sb.append('\n');
sb.append(TreeUtils.indent(nSpaces + 4));
sb.append("</entry>");
sb.append('\n');
}
sb.append(TreeUtils.indent(nSpaces + 2));
sb.append("</entries>");
sb.append('\n');
if (dumpTags) {
sb.append(TreeUtils.indent(nSpaces));
sb.append(endTag());
}
return sb.toString();
}
private void dumpSlotState(StringBuilder sb, int i, BIN bin) {
sb.append(" kd=\"").append(isEntryKnownDeleted(i));
sb.append("\" pd=\"").append(isEntryPendingDeleted(i));
sb.append("\" dirty=\"").append(isDirty(i));
sb.append("\" embedded=\"").append(isEmbeddedLN(i));
sb.append("\" noData=\"").append(isNoDataLN(i));
if (bin != null) {
sb.append("\" logSize=\"");
sb.append(bin.getLastLoggedSizeUnconditional(i));
long vlsn = bin.getCachedVLSN(i);
if (!VLSN.isNull(vlsn)) {
sb.append("\" vlsn=\"").append(vlsn);
}
}
if (bin != null && bin.getExpiration(i) != 0) {
sb.append("\" expires=\"");
sb.append(TTL.formatExpiration(
bin.getExpiration(i), bin.isExpirationInHours()));
}
sb.append("\"");
}
/**
* Converts to an identifying string that is safe to output in a log.
* Keys are not included for security/privacy reasons.
*/
public String toSafeString(final int... slotIndexes) {
final BIN bin = isBIN() ? (BIN) this : null;
final StringBuilder sb = new StringBuilder();
sb.append("IN nodeId=").append(getNodeId());
sb.append(" lastLoggedLSN=");
sb.append(DbLsn.getNoFormatString(getLastLoggedLsn()));
sb.append(" lastFulLSN=");
sb.append(DbLsn.getNoFormatString(getLastFullLsn()));
sb.append(" level=").append(Integer.toHexString(getLevel()));
sb.append(" flags=").append(Integer.toHexString(flags));
sb.append(" isBINDelta=").append(isBINDelta());
sb.append(" nSlots=").append(getNEntries());
if (slotIndexes != null) {
for (final int i : slotIndexes) {
sb.append(" slot-").append(i).append(":[");
sb.append("lsn=");
sb.append(DbLsn.getNoFormatString(getLsn(i)));
sb.append(" offset=");
sb.append(DbLsn.getFileOffset(getLsn(i)));
if (bin != null) {
sb.append(" offset+logSize=");
sb.append(DbLsn.getFileOffset(getLsn(i)) +
bin.getLastLoggedSizeUnconditional(i));
}
dumpSlotState(sb, i, bin);
sb.append("]");
}
}
return sb.toString();
}
@Override
public String toString() {
return dumpString(0, true);
}
public String shortClassName() {
return "IN";
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceSplit(Level level,
IN parent,
IN newSibling,
long parentLsn,
long myNewLsn,
long newSiblingLsn,
int splitIndex,
int idKeyIndex,
int childIndex) {
Logger logger = getEnv().getLogger();
if (logger.isLoggable(level)) {
StringBuilder sb = new StringBuilder();
sb.append(TRACE_SPLIT);
sb.append(" parent=");
sb.append(parent.getNodeId());
sb.append(" child=");
sb.append(getNodeId());
sb.append(" newSibling=");
sb.append(newSibling.getNodeId());
sb.append(" parentLsn = ");
sb.append(DbLsn.getNoFormatString(parentLsn));
sb.append(" childLsn = ");
sb.append(DbLsn.getNoFormatString(myNewLsn));
sb.append(" newSiblingLsn = ");
sb.append(DbLsn.getNoFormatString(newSiblingLsn));
sb.append(" splitIdx=");
sb.append(splitIndex);
sb.append(" idKeyIdx=");
sb.append(idKeyIndex);
sb.append(" childIdx=");
sb.append(childIndex);
LoggerUtils.logMsg(logger,
databaseImpl.getEnv(),
level,
sb.toString());
}
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceDelete(Level level, int index) {
Logger logger = databaseImpl.getEnv().getLogger();
if (logger.isLoggable(level)) {
StringBuilder sb = new StringBuilder();
sb.append(TRACE_DELETE);
sb.append(" in=").append(getNodeId());
sb.append(" index=");
sb.append(index);
LoggerUtils.logMsg(logger,
databaseImpl.getEnv(),
level,
sb.toString());
}
}
public final void setFetchINHook(TestHook hook) {
fetchINHook = hook;
}
}