blob: 7e9ecb54deb08fd20f6240cfe19da43c2f68d1ae [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.tree;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.latch.LatchContext;
import com.sleepycat.je.latch.LatchFactory;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.LatchTable;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.Loggable;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;
/**
* Tree implements the JE B+Tree.
*
* A note on tree search patterns:
* There's a set of Tree.search* methods. Some clients of the tree use
* those search methods directly, whereas other clients of the tree
* tend to use methods built on top of search.
*
* The semantics of search* are
* they leave you pointing at a BIN or IN
* they don't tell you where the reference of interest is.
* The semantics of the get* methods are:
* they leave you pointing at a BIN or IN
* they return the index of the slot of interest
* they traverse down to whatever level is needed
* they are built on top of search* methods.
* For the future:
* Over time, we need to clarify which methods are to be used by clients
* of the tree. Preferably clients that call the tree use get*, although
* their are cases where they need visibility into the tree structure.
*
* Also, search* should return the location of the slot to save us a
* second binary search.
*
* Search Method Call Hierarchy
* ----------------------------
* getFirst/LastNode
* search
* CALLED BY:
* CursorImpl.getFirstOrLast
*
* getNext/PrevBin
* getParentINForChildIN
* searchSubTree
* CALLED BY:
* DupConvert
* CursorImpl.getNext
*
* getParentINForChildIN
* IN.findParent
* does not use shared latching
* CALLED BY:
* Checkpointer.flushIN (doFetch=false, targetLevel=-1)
* FileProcessor.processIN (doFetch=true, targetLevel=LEVEL)
* Evictor.evictIN (doFetch=true, targetLevel=-1)
* RecoveryManager.replaceOrInsertChild (doFetch=true, targetLevel=-1)
* getNext/PrevBin (doFetch=true, targetLevel=-1)
*
* search
* searchSubTree
* CALLED BY:
* CursorImpl.searchAndPosition
* INCompressor to find BIN
*
* searchSubTree
* uses shared grandparent latching
*
* getParentBINForChildLN
* searchSplitsAllowed
* CALLED BY:
* RecoveryManager.redo
* RecoveryManager.recoveryUndo
* search
* CALLED BY:
* RecoveryManager.abortUndo
* RecoveryManager.rollbackUndo
* FileProcessor.processLN
* Cleaner.processPendingLN
* UtilizationProfile.verifyLsnIsObsolete (utility)
*
* findBinForInsert
* searchSplitsAllowed
* CALLED BY:
* CursorImpl.putInternal
*
* searchSplitsAllowed
* uses shared non-grandparent latching
* CALLED BY:
* DupConvert (instead of findBinForInsert, which needs a cursor)
*
* Possible Shared Latching Improvements
* -------------------------------------
* By implementing shared latching for BINs we would get better concurrency in
* these cases:
* Reads when LN is in cache, or LN is not needed (key-only op, e.g., dups)
*/
public final class Tree implements Loggable {
/* For debug tracing */
private static final String TRACE_ROOT_SPLIT = "RootSplit:";
private DatabaseImpl database;
private int maxTreeEntriesPerNode;
private ChildReference root;
/*
* Latch that must be held when using/accessing the root node. Protects
* against the root being changed out from underneath us by splitRoot.
* After the root IN is latched, the rootLatch can be released.
*/
private SharedLatch rootLatch;
/*
* We don't need the stack trace on this so always throw a static and
* avoid the cost of Throwable.fillInStack() every time it's thrown.
* [#13354].
*/
private static SplitRequiredException splitRequiredException =
new SplitRequiredException();
/* Stats */
private StatGroup stats;
private final ThreadLocal<TreeWalkerStatsAccumulator> treeStatsAccumulatorTL =
new ThreadLocal<TreeWalkerStatsAccumulator>();
/* For unit tests */
private TestHook waitHook; // used for generating race conditions
private TestHook searchHook; // [#12736]
private TestHook ckptHook; // [#13897]
private TestHook getParentINHook;
private TestHook fetchINHook;
/**
* Embodies an enum for the type of search being performed. NORMAL means
* do a regular search down the tree. LEFT/RIGHT means search down the
* left/right side to find the first/last node in the tree.
*/
public static class SearchType {
/* Search types */
public static final SearchType NORMAL = new SearchType();
public static final SearchType LEFT = new SearchType();
public static final SearchType RIGHT = new SearchType();
/* No lock types can be defined outside this class. */
private SearchType() {
}
}
/*
* Class that overrides ChildReference methods to enforce rules that apply
* to the root.
*
* Overrides fetchTarget() so that if the rootLatch is not held exclusively
* when the root is fetched, we upgrade it to exclusive. Also overrides
* setter methods to assert that an exclusive latch is held.
*
* Overrides setDirty to dirty the DatabaseImpl, so that the MapLN will be
* logged during the next checkpoint. This is critical when updating the
* root LSN.
*/
private class RootChildReference extends ChildReference {
private RootChildReference() {
super();
}
private RootChildReference(Node target, byte[] key, long lsn) {
super(target, key, lsn);
}
/* Caller is responsible for releasing rootLatch. */
@Override
public Node fetchTarget(DatabaseImpl database, IN in)
throws DatabaseException {
if (getTarget() == null && !rootLatch.isExclusiveOwner()) {
rootLatch.release();
rootLatch.acquireExclusive();
/*
* If the root field changed while unlatched then we have an
* invalid state and cannot continue. [#21686]
*/
if (this != root) {
throw EnvironmentFailureException.unexpectedState(
database.getEnv(),
"Root changed while unlatched, dbId=" +
database.getId());
}
}
return super.fetchTarget(database, in);
}
@Override
public void setTarget(Node target) {
assert rootLatch.isExclusiveOwner();
super.setTarget(target);
}
@Override
public void clearTarget() {
assert rootLatch.isExclusiveOwner();
super.clearTarget();
}
@Override
public void setLsn(long lsn) {
assert rootLatch.isExclusiveOwner();
super.setLsn(lsn);
}
@Override
void updateLsnAfterOptionalLog(DatabaseImpl dbImpl, long lsn) {
assert rootLatch.isExclusiveOwner();
super.updateLsnAfterOptionalLog(dbImpl, lsn);
}
@Override
void setDirty() {
super.setDirty();
database.setDirty();
}
}
/**
* Create a new tree.
*/
public Tree(DatabaseImpl database) {
init(database);
setDatabase(database);
}
/**
* Create a tree that's being read in from the log.
*/
public Tree() {
init(null);
maxTreeEntriesPerNode = 0;
}
/**
* constructor helper
*/
private void init(DatabaseImpl database) {
this.root = null;
this.database = database;
}
/**
* Set the database for this tree. Used by recovery when recreating an
* existing tree.
*/
public void setDatabase(DatabaseImpl database) {
this.database = database;
final EnvironmentImpl envImpl = database.getEnv();
/*
* The LatchContext for the root is special in that it is considered a
* Btree latch (the Btree latch table is used), but the context is not
* implemented by the IN class.
*/
final LatchContext latchContext = new LatchContext() {
@Override
public int getLatchTimeoutMs() {
return envImpl.getLatchTimeoutMs();
}
@Override
public String getLatchName() {
return "RootLatch";
}
@Override
public LatchTable getLatchTable() {
return LatchSupport.btreeLatchTable;
}
@Override
public EnvironmentImpl getEnvImplForFatalException() {
return envImpl;
}
};
rootLatch = LatchFactory.createSharedLatch(
latchContext, false /*exclusiveOnly*/);
maxTreeEntriesPerNode = database.getNodeMaxTreeEntries();
}
/**
* @return the database for this Tree.
*/
public DatabaseImpl getDatabase() {
return database;
}
/**
* Called when latching a child and the parent is latched. Used to
* opportunistically validate the parent pointer.
*/
private static void latchChild(final IN parent,
final IN child,
final CacheMode cacheMode) {
child.latch(cacheMode);
if (child.getParent() != parent) {
throw EnvironmentFailureException.unexpectedState();
}
}
/**
* Called when latching a child and the parent is latched. Used to
* opportunistically validate the parent pointer.
*/
private static void latchChildShared(final IN parent,
final IN child,
final CacheMode cacheMode) {
child.latchShared(cacheMode);
if (child.getParent() != parent) {
throw EnvironmentFailureException.unexpectedState();
}
}
public void latchRootLatchExclusive()
throws DatabaseException {
rootLatch.acquireExclusive();
}
public void releaseRootLatch()
throws DatabaseException {
rootLatch.release();
}
/**
* Set the root for the tree. Should only be called within the root latch.
*/
public void setRoot(ChildReference newRoot, boolean notLatched) {
assert (notLatched || rootLatch.isExclusiveOwner());
root = newRoot;
}
public ChildReference makeRootChildReference(
Node target,
byte[] key,
long lsn) {
return new RootChildReference(target, key, lsn);
}
private ChildReference makeRootChildReference() {
return new RootChildReference();
}
/*
* A tree doesn't have a root if (a) the root field is null, or (b) the
* root is non-null, but has neither a valid target nor a valid LSN. Case
* (b) can happen if the database is or was previously opened in deferred
* write mode.
*
* @return false if there is no real root.
*/
public boolean rootExists() {
if (root == null) {
return false;
}
if ((root.getTarget() == null) &&
(root.getLsn() == DbLsn.NULL_LSN)) {
return false;
}
return true;
}
/**
* Perform a fast check to see if the root IN is resident. No latching is
* performed. To ensure that the root IN is not loaded by another thread,
* this method should be called while holding a write lock on the MapLN.
* That will prevent opening the DB in another thread, and potentially
* loading the root IN. [#13415]
*/
public boolean isRootResident() {
return root != null && root.getTarget() != null;
}
/**
* Helper to obtain the root IN with shared root latching. Optionally
* updates the generation of the root when latching it.
*/
public IN getRootIN(CacheMode cacheMode)
throws DatabaseException {
return getRootINInternal(cacheMode, false/*exclusive*/);
}
/**
* Helper to obtain the root IN with exclusive root latching. Optionally
* updates the generation of the root when latching it.
*/
public IN getRootINLatchedExclusive(CacheMode cacheMode)
throws DatabaseException {
return getRootINInternal(cacheMode, true/*exclusive*/);
}
private IN getRootINInternal(CacheMode cacheMode, boolean exclusive)
throws DatabaseException {
rootLatch.acquireShared();
try {
return getRootINRootAlreadyLatched(cacheMode, exclusive);
} finally {
rootLatch.release();
}
}
/**
* Helper to obtain the root IN, when the root latch is already held.
*/
public IN getRootINRootAlreadyLatched(
CacheMode cacheMode,
boolean exclusive) {
if (!rootExists()) {
return null;
}
final IN rootIN = (IN) root.fetchTarget(database, null);
if (exclusive) {
rootIN.latch(cacheMode);
} else {
rootIN.latchShared(cacheMode);
}
return rootIN;
}
public IN getResidentRootIN(boolean latched)
throws DatabaseException {
IN rootIN = null;
if (rootExists()) {
rootIN = (IN) root.getTarget();
if (rootIN != null && latched) {
rootIN.latchShared(CacheMode.UNCHANGED);
}
}
return rootIN;
}
public IN withRootLatchedExclusive(WithRootLatched wrl)
throws DatabaseException {
try {
rootLatch.acquireExclusive();
return wrl.doWork(root);
} finally {
rootLatch.release();
}
}
public IN withRootLatchedShared(WithRootLatched wrl)
throws DatabaseException {
try {
rootLatch.acquireShared();
return wrl.doWork(root);
} finally {
rootLatch.release();
}
}
/**
* Get LSN of the rootIN. Obtained without latching, should only be
* accessed while quiescent.
*/
public long getRootLsn() {
if (root == null) {
return DbLsn.NULL_LSN;
} else {
return root.getLsn();
}
}
/**
* Cheaply calculates and returns the maximum possible number of LNs in the
* btree.
*/
public long getMaxLNs() {
final int levels;
final int topLevelSlots;
rootLatch.acquireShared();
try {
IN rootIN = (IN) root.fetchTarget(database, null);
levels = rootIN.getLevel() & IN.LEVEL_MASK;
topLevelSlots = rootIN.getNEntries();
} finally {
rootLatch.release();
}
return (long) (topLevelSlots *
Math.pow(database.getNodeMaxTreeEntries(), levels - 1));
}
/**
* Deletes a BIN specified by key from the tree. If the BIN resides in a
* subtree that can be pruned away, prune as much as possible, so we
* don't leave a branch that has no BINs.
*
* It's possible that the targeted BIN will now have entries, or will
* have resident cursors. Either will prevent deletion (see exceptions).
*
* Unlike splits, IN deletion does not immediately log the subtree parent
* or its ancestors. It is sufficient to simply dirty the subtree parent.
* Logging is not necessary for correctness, and if a checkpoint does not
* flush the subtree parent then recovery will add the BINs to the
* compressor queue when redoing the LN deletions.
*
* @param idKey - the identifier key of the node to delete.
*
* @throws NodeNotEmptyException if the BIN is not empty. The deletion is
* no longer possible.
*
* @throws CursorsExistException is the BIN has cursors. The deletion
* should be retried later by the INCompressor.
*/
public void delete(byte[] idKey)
throws NodeNotEmptyException, CursorsExistException {
final EnvironmentImpl envImpl = database.getEnv();
final Logger logger = envImpl.getLogger();
final List<SplitInfo> nodeLadder = searchDeletableSubTree(idKey);
if (nodeLadder == null) {
/*
* The tree is empty, so do nothing. Root compression is no
* longer supported. Root compression has no impact on memory
* usage now that we evict the root IN. It reduces log space
* taken by INs for empty (but not removed) databases, yet
* requires logging a INDelete and MapLN; this provides very
* little benefit, if any. Because it requires extensive
* testing (which has not been done), this minor benefit is not
* worth the cost. And by removing it we no longer log
* INDelete, which reduces complexity going forward. [#17546]
*/
return;
}
/* Detach this subtree. */
final SplitInfo detachPoint = nodeLadder.get(0);
try {
final IN branchParent = detachPoint.parent;
final IN branchRoot = detachPoint.child;
if (logger.isLoggable(Level.FINEST)) {
LoggerUtils.envLogMsg(
Level.FINEST, envImpl,
"Tree.delete() " +
Thread.currentThread().getId() + "-" +
Thread.currentThread().getName() + "-" +
envImpl.getName() +
" Deleting child node: " + branchRoot.getNodeId() +
" from parent node: " + branchParent.getNodeId() +
" parent has " + branchParent.getNEntries() +
" children");
}
branchParent.deleteEntry(detachPoint.index);
/*
* Remove deleted INs from the INList/cache and count them as
* provisionally obsolete. The parent is not logged immediately, so
* we can't count them immediately obsolete. They will be counted
* obsolete when an ancestor is logged non-provisionally. [#21348]
*/
final INList inList = database.getEnv().getInMemoryINs();
for (final SplitInfo info : nodeLadder) {
final IN child = info.child;
assert !child.isBINDelta(false);
assert !(child.isUpperIN() && child.getNEntries() > 1);
assert !(child.isBIN() && child.getNEntries() > 0);
/*
* Remove child from cache. The branch root was removed by
* deleteEntry above.
*/
if (child != branchRoot) {
inList.remove(child);
}
/* Count full and delta versions as obsolete. */
branchParent.trackProvisionalObsolete(
child, child.getLastFullLsn());
branchParent.trackProvisionalObsolete(
child, child.getLastDeltaLsn());
}
if (logger.isLoggable(Level.FINE)) {
LoggerUtils.envLogMsg(
Level.FINE, envImpl,
"SubtreeRemoval: subtreeRoot = " + branchRoot.getNodeId());
}
} finally {
releaseNodeLadderLatches(nodeLadder);
}
}
/**
* Search down the tree using a key, but instead of returning the BIN that
* houses that key, find the point where we can detach a deletable
* subtree. A deletable subtree is a branch where each IN has one child,
* and the bottom BIN has no entries and no resident cursors. That point
* can be found by saving a pointer to the lowest node in the path with
* more than one entry.
*
* INa
* / \
* INb INc
* | |
* INd ..
* / \
* INe ..
* |
* BINx (suspected of being empty)
*
* In this case, we'd like to prune off the subtree headed by INe. INd
* is the parent of this deletable subtree.
*
* The method returns a list of parent/child/index structures. In this
* example, the list will hold:
* INd/INe/index
* INe/BINx/index
* All three nodes will be EX-latched.
*
* @return null if the entire Btree is empty, or a list of SplitInfo for
* the branch to be deleted. If non-null is returned, the INs in the list
* will be EX-latched; otherwise, no INs will be latched.
*
* @throws NodeNotEmptyException if the BIN is not empty.
*
* @throws CursorsExistException is the BIN has cursors.
*/
private List<SplitInfo> searchDeletableSubTree(byte[] key)
throws NodeNotEmptyException, CursorsExistException {
assert (key!= null);
IN parent = getRootINLatchedExclusive(CacheMode.UNCHANGED);
if (parent == null) {
/* Tree was never persisted. */
return null;
}
final ArrayList<SplitInfo> nodeLadder = new ArrayList<>();
try {
IN child;
IN pinedIN = null;
do {
if (parent.getNEntries() == 0) {
throw EnvironmentFailureException.unexpectedState(
"Found upper IN with 0 entries");
}
if (parent.getNEntries() > 1) {
/*
* A node with more than one entry is the lowest potential
* branch parent. Unlatch/discard ancestors of this parent.
*/
for (final SplitInfo info : nodeLadder) {
info.parent.releaseLatch();
}
nodeLadder.clear();
pinedIN = null;
} else if (parent.isPinned()) {
pinedIN = parent;
}
final int index = parent.findEntry(key, false, false);
assert index >= 0;
/* Get the child node that matches. */
child = parent.fetchIN(index, CacheMode.UNCHANGED);
latchChild(parent, child, CacheMode.UNCHANGED);
nodeLadder.add(new SplitInfo(parent, child, index));
/* Continue down a level */
parent = child;
} while (!parent.isBIN());
if (pinedIN != null) {
throw CursorsExistException.CURSORS_EXIST;
}
/*
* See if there is a reason we can't delete this BIN -- i.e.
* new items have been inserted, or a cursor exists on it.
*/
assert (child.isBIN());
final BIN bin = (BIN) child;
if (bin.getNEntries() != 0) {
throw NodeNotEmptyException.NODE_NOT_EMPTY;
}
if (bin.isBINDelta()) {
throw EnvironmentFailureException.unexpectedState(
"Found BIN delta with 0 entries");
}
/*
* This case can happen if we are keeping a cursor on an empty
* BIN as we traverse.
*/
if (bin.nCursors() > 0 || child.isPinned()) {
throw CursorsExistException.CURSORS_EXIST;
}
if (nodeLadder.get(0).parent.getNEntries() <= 1) {
/* The entire tree is empty. */
releaseNodeLadderLatches(nodeLadder);
return null;
}
return nodeLadder;
} catch (Throwable e) {
releaseNodeLadderLatches(nodeLadder);
/* Release parent in case it was not added to nodeLadder. */
parent.releaseLatchIfOwner();
throw e;
}
}
/**
* Release latched acquired by searchDeletableSubTree. Each child is
* latched, plus the parent of the first node (the branch parent).
*/
private void releaseNodeLadderLatches(List<SplitInfo> nodeLadder)
throws DatabaseException {
if (nodeLadder.isEmpty()) {
return;
}
nodeLadder.get(0).parent.releaseLatch();
for (final SplitInfo info : nodeLadder) {
info.child.releaseLatch();
}
nodeLadder.clear();
}
/**
* Find the leftmost node (IN or BIN) in the tree.
*
* @return the leftmost node in the tree, null if the tree is empty. The
* returned node is latched and the caller must release it.
*/
public BIN getFirstNode(CacheMode cacheMode)
throws DatabaseException {
BIN bin = search(
null /*key*/, SearchType.LEFT, null /*binBoundary*/,
cacheMode, null /*comparator*/);
if (bin != null) {
bin.mutateToFullBIN(false /*leaveFreeSlot*/);
}
return bin;
}
/**
* Find the rightmost node (IN or BIN) in the tree.
*
* @return the rightmost node in the tree, null if the tree is empty. The
* returned node is latched and the caller must release it.
*/
public BIN getLastNode(CacheMode cacheMode)
throws DatabaseException {
BIN bin = search(
null /*key*/, SearchType.RIGHT, null /*binBoundary*/,
cacheMode, null /*comparator*/);
if (bin != null) {
bin.mutateToFullBIN(false /*leaveFreeSlot*/);
}
return bin;
}
/**
* Return a reference to the adjacent BIN.
*
* @param bin The BIN to find the next BIN for. This BIN is latched.
*
* @return The next BIN, or null if there are no more. The returned node
* is latched and the caller must release it. If null is returned, the
* argument BIN remains latched.
*/
public BIN getNextBin(BIN bin, CacheMode cacheMode)
throws DatabaseException {
return (BIN) getNextIN(bin, true, false, cacheMode);
}
/**
* Return a reference to the previous BIN.
*
* @param bin The BIN to find the next BIN for. This BIN is latched.
*
* @return The previous BIN, or null if there are no more. The returned
* node is latched and the caller must release it. If null is returned,
* the argument bin remains latched.
*/
public BIN getPrevBin(BIN bin, CacheMode cacheMode)
throws DatabaseException {
return (BIN) getNextIN(bin, false, false, cacheMode);
}
/**
* Returns the next IN in the tree before/after the given IN, and at the
* same level. For example, if a BIN is passed in the prevIn parameter,
* the next BIN will be returned.
*
* TODO: A possible problem with this method is that we don't know for
* certain whether it works properly in the face of splits. There are
* comments below indicating it does. But the Cursor.checkForInsertion
* method was apparently added because getNextBin/getPrevBin didn't work
* properly, and may skip a BIN. So at least it didn't work properly in
* the distant past. Archeology and possibly testing are needed to find
* the truth. Hopefully it does now work, and Cursor.checkForInsertion can
* be removed.
*
* TODO: To eliminate EX latches on upper INs, a new getParentINForChildIN
* is needed, which will return with both the parent and the grandparent
* SH-latched. If we do this, then in Tree.getNextIN() the call to
* searchSubtree() will be able to do grandparent latching, and the call
* to parent.fetchIN(index) will also be replace with a local version of
* grandparent latching.
*/
public IN getNextIN(
IN prevIn,
boolean forward,
boolean latchShared,
CacheMode cacheMode) {
assert(prevIn.isLatchOwner());
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(1);
}
prevIn.mutateToFullBIN(false /*leaveFreeSlot*/);
/*
* Use the right most key (for a forward progressing cursor) or the
* left most key (for a backward progressing cursor) as the search key.
* The reason is that the IN may get split while finding the next IN so
* it's not safe to take the IN's identifierKey entry. If the IN gets
* split, then the right (left) most key will still be on the
* resultant node. The exception to this is that if there are no
* entries, we just use the identifier key.
*/
final byte[] searchKey;
if (prevIn.getNEntries() == 0) {
searchKey = prevIn.getIdentifierKey();
} else if (forward) {
searchKey = prevIn.getKey(prevIn.getNEntries() - 1);
} else {
searchKey = prevIn.getKey(0);
}
final int targetLevel = prevIn.getLevel();
IN curr = prevIn;
boolean currIsLatched = false;
IN parent = null;
IN nextIN = null;
boolean nextINIsLatched = false;
boolean normalExit = false;
/*
* Ascend the tree until we find a level that still has nodes to the
* right (or left if !forward) of the path that we're on. If we reach
* the root level, we're done.
*/
try {
while (true) {
/*
* Move up a level from where we are now and check to see if we
* reached the top of the tree.
*/
currIsLatched = false;
if (curr.isRoot()) {
/* We've reached the root of the tree. */
curr.releaseLatch();
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(0);
}
normalExit = true;
return null;
}
final SearchResult result = getParentINForChildIN(
curr, false, /*useTargetLevel*/
true, /*doFetch*/ cacheMode);
if (result.exactParentFound) {
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(1);
}
parent = result.parent;
} else {
throw EnvironmentFailureException.unexpectedState(
"Failed to find parent for IN");
}
/*
* Figure out which entry we are in the parent. Add (subtract)
* 1 to move to the next (previous) one and check that we're
* still pointing to a valid child. Don't just use the result
* of the parent.findEntry call in getParentNode, because we
* want to use our explicitly chosen search key.
*/
int index = parent.findEntry(searchKey, false, false);
final boolean moreEntriesThisIn;
if (forward) {
index++;
moreEntriesThisIn = (index < parent.getNEntries());
} else {
moreEntriesThisIn = (index > 0);
index--;
}
if (moreEntriesThisIn) {
/*
* There are more entries to the right of the current path
* in parent. Get the entry, and then descend down the
* left most path to an IN.
*/
nextIN = parent.fetchIN(index, cacheMode);
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(1);
}
if (nextIN.getLevel() == targetLevel) {
if (latchShared) {
latchChildShared(parent, nextIN, cacheMode);
} else {
latchChild(parent, nextIN, cacheMode);
}
nextINIsLatched = true;
nextIN.mutateToFullBIN(false /*leaveFreeSlot*/);
parent.releaseLatch();
parent = null; // to avoid falsely unlatching parent
final TreeWalkerStatsAccumulator treeStatsAccumulator =
getTreeStatsAccumulator();
if (treeStatsAccumulator != null) {
nextIN.accumulateStats(treeStatsAccumulator);
}
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(1);
}
normalExit = true;
return nextIN;
} else {
/*
* We landed at a higher level than the target level.
* Descend down to the appropriate level.
*/
assert(nextIN.isUpperIN());
nextIN.latch(cacheMode);
nextINIsLatched = true;
parent.releaseLatch();
parent = null; // to avoid falsely unlatching parent
nextINIsLatched = false;
final IN ret = searchSubTree(
nextIN, null, /*key*/
(forward ? SearchType.LEFT : SearchType.RIGHT),
targetLevel, latchShared, cacheMode,
null /*comparator*/);
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(1);
}
if (ret.getLevel() == targetLevel) {
normalExit = true;
return ret;
} else {
throw EnvironmentFailureException.unexpectedState(
"subtree did not have a IN at level " +
targetLevel);
}
}
}
/* Nothing at this level. Ascend to a higher level. */
curr = parent;
currIsLatched = true;
parent = null; // to avoid falsely unlatching parent below
}
} finally {
if (!normalExit) {
if (curr != null && currIsLatched) {
curr.releaseLatch();
}
if (parent != null) {
parent.releaseLatch();
}
if (nextIN != null && nextINIsLatched) {
nextIN.releaseLatch();
}
}
}
}
/**
* Search for the parent P of a given IN C (where C is viewed as a logical
* node; not as a java obj). If found, P is returned latched exclusively.
* The method is used when C has been accessed "directly", i.e., not via a
* tree search, and we need to perform an operation on C that requires an
* update to its parent. Such situations arise during eviction (C has been
* accessed via the LRU list), checkpointing, and recovery (C has been read
* from the log and is not attached to the in-memory tree).
*
* The method uses C's identifierKey to search down the tree until:
*
* (a) doFetch is false and we need to access a node that is not cached.
* In this case, we are actually looking for the cached copies of both C
* and its parent, so a cache miss on the path to C is considered a
* failure. This search mode is used by the evictor: to evict C (which has
* been retrieved from the LRU), its parent must be found and EX-latched;
* however, if the C has been evicted already by another thread, there is
* nothing to do (C will GC-ed).
* or
* (b) We reach a node whose node id equals the C's node id. In this case,
* we know for sure that C still belongs to the BTree and its parent has
* been found.
* or
* (c) useTargetLevel is true and we reach a node P that is at one level
* above C's level. We know that P contains a slot S whose corresponding
* key range includes C's identifierKey. Since we haven't read the child
* node under S to check its node id, we cannot know for sure that C is
* still in the tree. Nevertheless, we consider this situation a success,
* i.e., P is the parent node we are looking for. In this search mode,
* after this method returns, the caller is expected to take further
* action based on the info in slot S. For example, if C was created
* by reading a log entry at LSN L, and the LSN at slot S is also L, then
* we know P is the real parent (and we have thus saved a possible extra
* I/O to refetch the C node from the log to check its node id). This
* search mode is used by the cleaner.
* or
* (d) None of the above conditions occur and the bottom of the BTree is
* reached. In this case, no parent exists (the child node is an old
* version of a node that has been removed from the BTree).
*
* @param child The child node for which to find the parent. This node is
* latched by the caller and is unlatched by this function before returning
* to the caller.
*
* @param useTargetLevel If true, the search is considered successful if
* a node P is reached at one level above C's level. P is the parent to
* return to the caller.
*
* @param doFetch if false, stop the search if we run into a non-resident
* child and assume that no parent exists.
*
* @param cacheMode The CacheMode for affecting the hotness of the nodes
* visited during the search.
*
* @return a SearchResult object. If the parent has been found,
* result.foundExactMatch is true, result.parent refers to that node, and
* result.index is the slot for the child IN inside the parent IN.
* Otherwise, result.foundExactMatch is false and result.parent is null.
*/
public SearchResult getParentINForChildIN(
IN child,
boolean useTargetLevel,
boolean doFetch,
CacheMode cacheMode)
throws DatabaseException {
return getParentINForChildIN(
child, useTargetLevel, doFetch,
cacheMode, null /*trackingList*/);
}
/**
* This version of getParentINForChildIN does the same thing as the version
* above, but also adds a "trackingList" param. If trackingList is not
* null, the LSNs of the parents visited along the way are added to the
* list, as a debug tracing mechanism. This is meant to stay in production,
* to add information to the log.
*/
public SearchResult getParentINForChildIN(
IN child,
boolean useTargetLevel,
boolean doFetch,
CacheMode cacheMode,
List<TrackingInfo> trackingList)
throws DatabaseException {
/* Sanity checks */
if (child == null) {
throw EnvironmentFailureException.unexpectedState(
"getParentINForChildIN given null child node");
}
assert child.isLatchOwner();
/*
* Get information from child before releasing latch.
*/
long targetId = child.getNodeId();
byte[] targetKey = child.getIdentifierKey();
int targetLevel = (useTargetLevel ? child.getLevel() : -1);
int exclusiveLevel = child.getLevel() + 1;
boolean requireExactMatch = true;
child.releaseLatch();
return getParentINForChildIN(
targetId, targetKey, targetLevel,
exclusiveLevel, requireExactMatch, doFetch,
cacheMode, trackingList);
}
/**
* This version of getParentINForChildIN() is the actual implementation
* of the previous 2 versions (read the comments there), but it also
* implements one additional use cases via the extra "requireExactMatch"
* param.
*
* {@literal requireExactMatch == false && doFetch == false}
* In this case we are actually looking for the lowest cached ancestor
* of the C node. The method will always return a node (considered as the
* "parent") unless the BTree is empty (has no nodes at all). The returned
* node must be latched, but not necessarily in EX mode. This search mode
* is used by the checkpointer.
*
* The exclusiveLevel param:
* In general, if exclusiveLevel == L, nodes above L will be SH latched and
* nodes at or below L will be EX-latched. In all current use cases, L is
* set to 1 + C.level. Note that if doFetch == false, the normalized
* exclusiveLevel must be {@literal >=} 2 so that loadIN can be called.
*/
public SearchResult getParentINForChildIN(
long targetNodeId,
byte[] targetKey,
int targetLevel,
int exclusiveLevel,
boolean requireExactMatch,
boolean doFetch,
CacheMode cacheMode,
List<TrackingInfo> trackingList)
throws DatabaseException {
/* Call hook before latching. No latches are held. */
TestHookExecute.doHookIfSet(getParentINHook);
assert doFetch || (exclusiveLevel & IN.LEVEL_MASK) >= 2;
/*
* SearchResult is initialized as follows:
* exactParentFound = false;
* parent = null; index = -1; childNotResident = false;
*/
SearchResult result = new SearchResult();
/* Get the tree root, SH-latched. */
IN rootIN = getRootIN(cacheMode);
if (rootIN == null) {
return result;
}
/* If the root is the target node, there is no parent */
assert(rootIN.getNodeId() != targetNodeId);
assert(rootIN.getLevel() >= exclusiveLevel) :
" rootLevel=" + rootIN.getLevel() +
" exLevel=" + exclusiveLevel;
IN parent = rootIN;
IN child = null;
boolean success = false;
try {
if (rootIN.getLevel() <= exclusiveLevel) {
rootIN.releaseLatch();
rootIN = getRootINLatchedExclusive(cacheMode);
assert(rootIN != null);
parent = rootIN;
}
while (true) {
assert(parent.getNEntries() > 0);
result.index = parent.findEntry(targetKey, false, false);
if (trackingList != null) {
trackingList.add(new TrackingInfo(
parent.getLsn(result.index), parent.getNodeId(),
parent.getNEntries(), result.index));
}
assert TestHookExecute.doHookIfSet(searchHook);
if (targetLevel > 0 && parent.getLevel() == targetLevel + 1) {
result.exactParentFound = true;
result.parent = parent;
break;
}
if (doFetch) {
child = parent.fetchINWithNoLatch(result, targetKey);
if (child == null) {
if (trackingList != null) {
trackingList.clear();
}
result.reset();
TestHookExecute.doHookIfSet(fetchINHook, child);
rootIN = getRootIN(cacheMode);
assert(rootIN != null);
if (rootIN.getLevel() <= exclusiveLevel) {
rootIN.releaseLatch();
rootIN = getRootINLatchedExclusive(cacheMode);
assert(rootIN != null);
}
parent = rootIN;
continue;
}
} else {
/*
* We can only call loadIN if we have an EX-latch on the
* parent. However, calling loadIN is only necessary when
* the parent is at level 2, since UINs are not cached
* off-heap, and exclusiveLevel is currently always >= 2.
*/
if (parent.getNormalizedLevel() == 2) {
child = parent.loadIN(result.index, cacheMode);
} else {
child = (IN) parent.getTarget(result.index);
}
}
assert(child != null || !doFetch);
if (child == null) {
if (requireExactMatch) {
parent.releaseLatch();
} else {
result.parent = parent;
}
result.childNotResident = true;
break;
}
if (child.getNodeId() == targetNodeId) {
result.exactParentFound = true;
result.parent = parent;
break;
}
if (child.isBIN()) {
if (requireExactMatch) {
parent.releaseLatch();
} else {
result.parent = parent;
}
break;
}
/* We can search further down the tree. */
if (child.getLevel() <= exclusiveLevel) {
latchChild(parent, child, cacheMode);
} else {
latchChildShared(parent, child, cacheMode);
}
parent.releaseLatch();
parent = child;
child = null;
}
success = true;
} finally {
if (!success) {
if (parent.isLatchOwner()) {
parent.releaseLatch();
}
if (child != null && child.isLatchOwner()) {
child.releaseLatch();
}
}
}
if (result.parent != null) {
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(1);
}
assert((!doFetch && !requireExactMatch) ||
result.parent.isLatchOwner());
}
return result;
}
/**
* Return a reference to the parent of this LN. This searches through the
* tree and allows splits, if the splitsAllowed param is true. Set the
* tree location to the proper BIN parent whether or not the LN child is
* found. That's because if the LN is not found, recovery or abort will
* need to place it within the tree, and so we must point at the
* appropriate position.
*
* <p>When this method returns with location.bin non-null, the BIN is
* latched and must be unlatched by the caller. Note that location.bin may
* be non-null even if this method returns false.</p>
*
* @param location a holder class to hold state about the location
* of our search. Sort of an internal cursor.
*
* @param key key to navigate through main key
*
* @param splitsAllowed true if this method is allowed to cause tree splits
* as a side effect. In practice, recovery can cause splits, but abort
* can't.
*
* @param blindDeltaOps Normally, if this method lands on a BIN-delta and
* the search key is not in that delta, it will mutate the delta to a full
* BIN to make sure whether the search key exists in the tree or not.
* However, by passing true for blindDeltaOps, the caller indicates that
* it doesn't really care whether the key is in the tree or not: it is
* going to insert the key in the BIN-delta, if not already there,
* essentially overwritting the slot that may exist in the full BIN. So,
* if blindDeltaOps is true, the method will not mutate a BIN-delta parent
* (unless the BIN-delta has no space for a slot insertion).
*
* @param cacheMode The CacheMode for affecting the hotness of the tree.
*
* @return true if node found in tree.
* If false is returned and there is the possibility that we can insert
* the record into a plausible parent we must also set
* - location.bin (may be null if no possible parent found)
* - location.lnKey (don't need to set if no possible parent).
*/
public boolean getParentBINForChildLN(
TreeLocation location,
byte[] key,
boolean splitsAllowed,
boolean blindDeltaOps,
CacheMode cacheMode)
throws DatabaseException {
/*
* Find the BIN that either points to this LN or could be its
* ancestor.
*/
location.reset();
BIN bin;
int index;
if (splitsAllowed) {
bin = searchSplitsAllowed(key, cacheMode, null /*comparator*/);
} else {
bin = search(key, cacheMode);
}
if (bin == null) {
return false;
}
try {
while (true) {
location.bin = bin;
index = bin.findEntry(
key, true /*indicateIfExact*/, false /*exactSearch*/);
boolean match = (index >= 0 &&
(index & IN.EXACT_MATCH) != 0);
index &= ~IN.EXACT_MATCH;
location.index = index;
location.lnKey = key;
/*
if (!match && bin.isBINDelta() && blindDeltaOps) {
System.out.println(
"Blind op on BIN-delta : " + bin.getNodeId() +
" nEntries = " +
bin.getNEntries() +
" max entries = " +
bin.getMaxEntries() +
" full BIN entries = " +
bin.getFullBinNEntries() +
" full BIN max entries = " +
bin.getFullBinMaxEntries());
}
*/
if (match) {
location.childLsn = bin.getLsn(index);
location.childLoggedSize = bin.getLastLoggedSize(index);
location.isKD = bin.isEntryKnownDeleted(index);
location.isEmbedded = bin.isEmbeddedLN(index);
return true;
} else {
if (bin.isBINDelta() &&
(!blindDeltaOps ||
bin.getNEntries() >= bin.getMaxEntries())) {
bin.mutateToFullBIN(splitsAllowed /*leaveFreeSlot*/);
location.reset();
continue;
}
return false;
}
}
} catch (RuntimeException e) {
bin.releaseLatch();
location.bin = null;
throw e;
}
}
/**
* Find the BIN that is relevant to the insert. If the tree doesn't exist
* yet, then create the first IN and BIN. On return, the cursor is set to
* the BIN that is found or created, and the BIN is latched.
*/
public BIN findBinForInsert(final byte[] key, final CacheMode cacheMode) {
boolean rootLatchIsHeld = false;
BIN bin = null;
try {
long logLsn;
/*
* We may have to try several times because of a small
* timing window, explained below.
*/
while (true) {
rootLatchIsHeld = true;
rootLatch.acquireShared();
if (!rootExists()) {
rootLatch.release();
rootLatch.acquireExclusive();
if (rootExists()) {
rootLatch.release();
rootLatchIsHeld = false;
continue;
}
final EnvironmentImpl env = database.getEnv();
final INList inMemoryINs = env.getInMemoryINs();
/*
* This is an empty tree, either because it's brand new
* tree or because everything in it was deleted. Create an
* IN and a BIN. We could latch the rootIN here, but
* there's no reason to since we're just creating the
* initial tree and we have the rootLatch held. Remember
* that referred-to children must be logged before any
* references to their LSNs.
*/
IN rootIN =
new IN(database, key, maxTreeEntriesPerNode, 2);
rootIN.setIsRoot(true);
rootIN.latch(cacheMode);
/* First BIN in the tree, log provisionally right away. */
bin = new BIN(database, key, maxTreeEntriesPerNode, 1);
bin.latch(cacheMode);
logLsn = bin.optionalLogProvisionalNoCompress(rootIN);
/*
* Log the root right away. Leave the root dirty, because
* the MapLN is not being updated, and we want to avoid
* this scenario from [#13897], where the LN has no
* possible parent.
* provisional BIN
* root IN
* checkpoint start
* LN is logged
* checkpoint end
* BIN is dirtied, but is not part of checkpoint
*/
boolean insertOk = rootIN.insertEntry(bin, key, logLsn);
assert insertOk;
logLsn = rootIN.optionalLog();
rootIN.setDirty(true); /*force re-logging, see [#13897]*/
root = makeRootChildReference(rootIN, new byte[0], logLsn);
rootIN.releaseLatch();
/* Add the new nodes to the in memory list. */
inMemoryINs.add(bin);
inMemoryINs.add(rootIN);
env.getEvictor().addBack(bin);
rootLatch.release();
rootLatchIsHeld = false;
break;
} else {
rootLatch.release();
rootLatchIsHeld = false;
/*
* There's a tree here, so search for where we should
* insert. However, note that a window exists after we
* release the root latch. We release the latch because the
* search method expects to take the latch. After the
* release and before search, the INCompressor may come in
* and delete the entire tree, so search may return with a
* null.
*/
bin = searchSplitsAllowed(key, cacheMode);
if (bin == null) {
/* The tree was deleted by the INCompressor. */
continue;
} else {
/* search() found a BIN where this key belongs. */
break;
}
}
}
} finally {
if (rootLatchIsHeld) {
rootLatch.release();
}
}
/* testing hook to insert item into log. */
assert TestHookExecute.doHookIfSet(ckptHook);
return bin;
}
/**
* Do a key based search, permitting pre-emptive splits. Returns the
* target node's parent.
*/
public BIN searchSplitsAllowed(byte[] key, CacheMode cacheMode) {
return searchSplitsAllowed(key, cacheMode, null);
}
private BIN searchSplitsAllowed(
byte[] key,
CacheMode cacheMode,
Comparator<byte[]> comparator) {
BIN insertTarget = null;
while (insertTarget == null) {
rootLatch.acquireShared();
boolean rootLatched = true;
boolean rootINLatched = false;
boolean success = false;
IN rootIN = null;
/*
* Latch the rootIN, check if it needs splitting. If so split it
* and update the associated MapLN. To update the MapLN, we must
* lock it, which implies that all latches must be released prior
* to the lock, and as a result, the root may require splitting
* again or may be split by another thread. So we must restart
* the loop to get the latest root.
*/
try {
if (!rootExists()) {
return null;
}
rootIN = (IN) root.fetchTarget(database, null);
if (rootIN.needsSplitting()) {
rootLatch.release();
rootLatch.acquireExclusive();
if (!rootExists()) {
return null;
}
rootIN = (IN) root.fetchTarget(database, null);
if (rootIN.needsSplitting()) {
splitRoot(cacheMode);
rootLatch.release();
rootLatched = false;
EnvironmentImpl env = database.getEnv();
env.getDbTree().optionalModifyDbRoot(database);
continue;
}
}
rootIN.latchShared(cacheMode);
rootINLatched = true;
success = true;
} finally {
if (!success && rootINLatched) {
rootIN.releaseLatch();
}
if (rootLatched) {
rootLatch.release();
}
}
/*
* Now, search the tree, doing splits if required. The rootIN
* is latched in SH mode, but this.root is not latched. If any
* splits are needed, this.root will first be latched exclusively
* and will stay latched until all splits are done.
*/
try {
assert(rootINLatched);
insertTarget = searchSplitsAllowed(
rootIN, key, cacheMode, comparator);
if (insertTarget == null) {
if (LatchSupport.TRACK_LATCHES) {
LatchSupport.expectBtreeLatchesHeld(0);
}
database.getEnv().incRelatchesRequired();
}
} catch (SplitRequiredException e) {
/*
* The last slot in the root was used at the point when this
* thread released the rootIN latch in order to force splits.
* Retry. SR [#11147].
*/
continue;
}
}
return insertTarget;
}
/**
* Search the tree, permitting preemptive splits.
*
* When this returns, parent will be unlatched unless parent is the
* returned IN.
*/
private BIN searchSplitsAllowed(
IN rootIN,
byte[] key,
CacheMode cacheMode,
Comparator<byte[]> comparator)
throws SplitRequiredException {
assert(rootIN.isLatchOwner());
if (!rootIN.isRoot()) {
throw EnvironmentFailureException.unexpectedState(
"A null or non-root IN was given as the parent");
}
int index;
IN parent = rootIN;
IN child = null;
boolean success = false;
/*
* Search downward until we hit a node that needs a split. In that
* case, retreat to the top of the tree and force splits downward.
*/
try {
do {
if (parent.getNEntries() == 0) {
throw EnvironmentFailureException.unexpectedState(
"Found upper IN with 0 entries");
}
index = parent.findEntry(key, false, false, comparator);
assert index >= 0;
child = parent.fetchINWithNoLatch(index, key);
if (child == null) {
return null; // restart the search
}
/* if child is a BIN, it is actually EX-latched */
latchChildShared(parent, child, cacheMode);
/*
* If we need to split, try compressing first and check again.
* Mutate to a full BIN because compression has no impact on a
* BIN-delta, and a full BIN is needed for splitting anyway.
*/
if (child.needsSplitting()) {
child.mutateToFullBIN(false /*leaveFreeSlot*/);
database.getEnv().lazyCompress(
child, true /*compressDirtySlots*/);
if (child.needsSplitting()) {
child.releaseLatch();
parent.releaseLatch();
/* SR [#11144]*/
assert TestHookExecute.doHookIfSet(waitHook);
/*
* forceSplit may throw SplitRequiredException if it
* finds that the root needs splitting. Allow the
* exception to propagate up to the caller, who will
* do the root split. Otherwise, restart the search
* from the root IN again.
*/
rootIN = forceSplit(key, cacheMode);
parent = rootIN;
assert(rootIN.isLatchOwner());
if (!rootIN.isRoot()) {
throw EnvironmentFailureException.unexpectedState(
"A null or non-root IN was given as the parent");
}
continue;
}
}
/* Continue down a level */
parent.releaseLatch();
parent = child;
child = null;
} while (!parent.isBIN());
success = true;
return (BIN)parent;
} finally {
if (!success) {
if (child != null && child.isLatchOwner()) {
child.releaseLatch();
}
if (parent != child && parent.isLatchOwner()) {
parent.releaseLatch();
}
}
}
}
/**
* Do pre-emptive splitting: search down the tree until we get to the BIN
* level, and split any nodes that fit the splittable requirement except
* for the root. If the root needs splitting, a splitRequiredException
* is thrown and the root split is handled at a higher level.
*
* Note that more than one node in the path may be splittable. For example,
* a tree might have a level2 IN and a BIN that are both splittable, and
* would be encountered by the same insert operation.
*
* Splits cause INs to be logged in all ancestors, including the root. This
* is to avoid the "great aunt" problem described in
* RecoveryManager.buildINs.
*
* INs below the root are logged provisionally; only the root is logged
* non-provisionally. Provisional logging is necessary during a checkpoint
* for levels less than maxFlushLevel.
*
* This method acquires and holds this.rootLatch in EX mode during its
* whole duration (so splits are serialized). The rootLatch is released
* on return.
*
* @return the tree root node, latched in EX mode. This may be different
* than the tree root when this method was called, because no latches are
* held on entering this method.
*
* All latches are released in case of exception.
*/
private IN forceSplit(byte[] key, CacheMode cacheMode)
throws DatabaseException, SplitRequiredException {
final ArrayList<SplitInfo> nodeLadder = new ArrayList<SplitInfo>();
boolean allLeftSideDescent = true;
boolean allRightSideDescent = true;
int index;
IN parent;
IN child = null;
IN rootIN = null;
/*
* Latch the root in order to update the root LSN when we're done.
* Latch order must be: root, root IN. We'll leave this method with the
* original parent latched.
*/
rootLatch.acquireExclusive();
boolean success = false;
try {
/* The root IN may have been evicted. [#16173] */
rootIN = (IN) root.fetchTarget(database, null);
parent = rootIN;
parent.latch(cacheMode);
/*
* Another thread may have crept in and
* - used the last free slot in the parent, making it impossible
* to correctly propagate the split.
* - actually split the root, in which case we may be looking at
* the wrong subtree for this search.
* If so, throw and retry from above. SR [#11144]
*/
if (rootIN.needsSplitting()) {
throw splitRequiredException;
}
/*
* Search downward to the BIN level, saving the information
* needed to do a split if necessary.
*/
do {
if (parent.getNEntries() == 0) {
throw EnvironmentFailureException.unexpectedState(
"Found upper IN with 0 entries");
}
/* Look for the entry matching key in the current node. */
index = parent.findEntry(key, false, false);
assert index >= 0;
if (index != 0) {
allLeftSideDescent = false;
}
if (index != (parent.getNEntries() - 1)) {
allRightSideDescent = false;
}
/*
* Get the child node that matches. We only need to work on
* nodes in residence.
*/
child = parent.loadIN(index, cacheMode);
if (child == null) {
break;
}
latchChild(parent, child, cacheMode);
nodeLadder.add(new SplitInfo(parent, child, index));
/* Continue down a level */
parent = child;
} while (!parent.isBIN());
boolean startedSplits = false;
/*
* Process the accumulated nodes from the bottom up. Split each
* node if required. If the node should not split, we check if
* there have been any splits on the ladder yet. If there are none,
* we merely release the node, since there is no update. If splits
* have started, we need to propagate new LSNs upward, so we log
* the node and update its parent.
*/
long lastParentForSplit = Node.NULL_NODE_ID;
for (int i = nodeLadder.size() - 1; i >= 0; i -= 1) {
final SplitInfo info = nodeLadder.get(i);
child = info.child;
parent = info.parent;
index = info.index;
/* Opportunistically split the node if it is full. */
if (child.needsSplitting()) {
child.mutateToFullBIN(false /*leaveFreeSlot*/);
final IN grandParent =
(i > 0) ? nodeLadder.get(i - 1).parent : null;
if (allLeftSideDescent || allRightSideDescent) {
child.splitSpecial(
parent, index, grandParent, maxTreeEntriesPerNode,
key, allLeftSideDescent);
} else {
child.split(
parent, index, grandParent, maxTreeEntriesPerNode);
}
lastParentForSplit = parent.getNodeId();
startedSplits = true;
/*
* If the DB root IN was logged, update the DB tree's child
* reference. Now the MapLN is logically dirty. Be sure to
* flush the MapLN if we ever evict the root.
*/
if (parent.isRoot()) {
root.updateLsnAfterOptionalLog(
database, parent.getLastLoggedLsn());
}
} else {
if (startedSplits) {
final long newChildLsn;
/*
* If this child was the parent of a split, it's
* already logged by the split call. We just need to
* propagate the logging upwards. If this child is just
* a link in the chain upwards, log it.
*/
if (lastParentForSplit == child.getNodeId()) {
newChildLsn = child.getLastLoggedLsn();
} else {
newChildLsn = child.optionalLogProvisional(parent);
}
parent.updateEntry(
index, newChildLsn, VLSN.NULL_VLSN_SEQUENCE,
0/*lastLoggedSize*/);
/*
* The root is never a 'child' in nodeLadder so it must
* be logged separately.
*/
if (parent.isRoot()) {
final long newRootLsn = parent.optionalLog();
root.updateLsnAfterOptionalLog(
database, newRootLsn);
}
}
}
child.releaseLatch();
child = null;
}
success = true;
} finally {
if (!success) {
if (child != null) {
child.releaseLatchIfOwner();
}
for (SplitInfo info : nodeLadder) {
info.child.releaseLatchIfOwner();
}
if (rootIN != null) {
rootIN.releaseLatchIfOwner();
}
}
rootLatch.release();
}
return rootIN;
}
/**
* Split the root of the tree.
*/
private void splitRoot(CacheMode cacheMode)
throws DatabaseException {
/*
* Create a new root IN, insert the current root IN into it, and then
* call split.
*/
EnvironmentImpl env = database.getEnv();
INList inMemoryINs = env.getInMemoryINs();
IN curRoot = null;
curRoot = (IN) root.fetchTarget(database, null);
curRoot.latch(cacheMode);
long curRootLsn = 0;
long logLsn = 0;
IN newRoot = null;
try {
/*
* Make a new root IN, giving it an id key from the previous root.
*/
byte[] rootIdKey = curRoot.getKey(0);
newRoot = new IN(database, rootIdKey, maxTreeEntriesPerNode,
curRoot.getLevel() + 1);
newRoot.latch(cacheMode);
newRoot.setIsRoot(true);
curRoot.setIsRoot(false);
/*
* Make the new root IN point to the old root IN. Log the old root
* provisionally, because we modified it so it's not the root
* anymore, then log the new root. We are guaranteed to be able to
* insert entries, since we just made this root.
*/
boolean logSuccess = false;
try {
curRootLsn = curRoot.optionalLogProvisional(newRoot);
boolean inserted = newRoot.insertEntry(
curRoot, rootIdKey, curRootLsn);
assert inserted;
logLsn = newRoot.optionalLog();
logSuccess = true;
} finally {
if (!logSuccess) {
/* Something went wrong when we tried to log. */
curRoot.setIsRoot(true);
}
}
inMemoryINs.add(newRoot);
/*
* Don't add the new root into the LRU because it has a cached
* child.
*/
/*
* Make the tree's root reference point to this new node. Now the
* MapLN is logically dirty, but the change hasn't been logged. Be
* sure to flush the MapLN if we ever evict the root.
*/
root.setTarget(newRoot);
root.updateLsnAfterOptionalLog(database, logLsn);
curRoot.split(newRoot, 0, null, maxTreeEntriesPerNode);
root.setLsn(newRoot.getLastLoggedLsn());
} finally {
/* FindBugs ignore possible null pointer dereference of newRoot. */
newRoot.releaseLatch();
curRoot.releaseLatch();
}
database.getEnv().incRootSplits();
traceSplitRoot(Level.FINE, TRACE_ROOT_SPLIT, newRoot, logLsn,
curRoot, curRootLsn);
}
public BIN search(byte[] key, CacheMode cacheMode) {
return search(key, SearchType.NORMAL, null, cacheMode, null);
}
/**
* Search the tree, starting at the root. Depending on search type either
* (a) search for the BIN that *should* contain a given key, or (b) return
* the right-most or left-most BIN in the tree.
*
* Preemptive splitting is not done during the search.
*
* @param key - the key to search for, or null if searchType is LEFT or
* RIGHT.
*
* @param searchType - The type of tree search to perform. NORMAL means
* we're searching for key in the tree. LEFT/RIGHT means we're descending
* down the left or right side, resp.
*
* @param binBoundary - If non-null, information is returned about whether
* the BIN found is the first or last BIN in the database.
*
* @return - the BIN that matches the criteria, if any. Returns null if
* the root is null. BIN is latched (unless it's null) and must be
* unlatched by the caller. In a NORMAL search, it is the caller's
* responsibility to do the findEntry() call on the key and BIN to locate
* the entry (if any) that matches key.
*/
public BIN search(
byte[] key,
SearchType searchType,
BINBoundary binBoundary,
CacheMode cacheMode,
Comparator<byte[]> comparator) {
IN rootIN = getRootIN(cacheMode);
if (rootIN == null) {
return null;
}
assert ((searchType != SearchType.LEFT &&
searchType != SearchType.RIGHT) || key == null);
if (binBoundary != null) {
binBoundary.isLastBin = true;
binBoundary.isFirstBin = true;
}
boolean success = false;
int index;
IN parent = rootIN;
IN child = null;
TreeWalkerStatsAccumulator treeStatsAccumulator =
getTreeStatsAccumulator();
try {
if (treeStatsAccumulator != null) {
parent.accumulateStats(treeStatsAccumulator);
}
do {
if (parent.getNEntries() == 0) {
throw EnvironmentFailureException.unexpectedState(
"Upper IN with 0 entries");
}
if (searchType == SearchType.NORMAL) {
index = parent.findEntry(key, false, false, comparator);
} else if (searchType == SearchType.LEFT) {
index = 0;
} else if (searchType == SearchType.RIGHT) {
index = parent.getNEntries() - 1;
} else {
throw EnvironmentFailureException.unexpectedState(
"Invalid value of searchType: " + searchType);
}
assert(index >= 0);
if (binBoundary != null) {
if (index != parent.getNEntries() - 1) {
binBoundary.isLastBin = false;
}
if (index != 0) {
binBoundary.isFirstBin = false;
}
}
child = parent.fetchINWithNoLatch(index, key);
if (child == null) {
parent = getRootIN(cacheMode);
assert(parent != null);
if (treeStatsAccumulator != null) {
parent.accumulateStats(treeStatsAccumulator);
}
continue;
}
/* Latch the child. Note: BINs are always latched exclusive. */
latchChildShared(parent, child, cacheMode);
if (treeStatsAccumulator != null) {
child.accumulateStats(treeStatsAccumulator);
}
parent.releaseLatch();
parent = child;
child = null;
} while (!parent.isBIN());
success = true;
return (BIN)parent;
} finally {
if (!success) {
/*
* In [#14903] we encountered a latch exception below and the
* original exception was lost. Print the stack trace and
* allow the original exception to be thrown if this happens
* again, to get more information about the problem.
*/
try {
if (child != null && child.isLatchOwner()) {
child.releaseLatch();
}
if (parent != child && parent.isLatchOwner()) {
parent.releaseLatch();
}
} catch (Exception e) {
LoggerUtils.traceAndLogException(
database.getEnv(), "Tree", "searchSubTreeInternal", "",
e);
}
}
}
}
/*
* Search for the given key in the subtree rooted at the given parent IN.
* The search descends until the given target level, and the IN that
* contains or covers the key is returned latched in EX or SH mode as
* specified by the latchShared param.
*
* The method uses grandparent latching, but only if the parent is the
* root of the whole Btree and it is SH-latched on entry.
*/
private IN searchSubTree(
IN parent,
byte[] key,
SearchType searchType,
int targetLevel,
boolean latchShared,
CacheMode cacheMode,
Comparator<byte[]> comparator) {
/*
* If a an intermediate IN (e.g., from getNextIN) was
* originally passed, it was latched exclusively.
*/
assert(parent != null &&
(parent.isRoot() ||
parent.isLatchExclusiveOwner()));
if ((searchType == SearchType.LEFT ||
searchType == SearchType.RIGHT) &&
key != null) {
/*
* If caller is asking for a right or left search, they shouldn't
* be passing us a key.
*/
throw EnvironmentFailureException.unexpectedState(
"searchSubTree passed key and left/right search");
}
assert(parent.isUpperIN());
assert(parent.isLatchOwner());
boolean success = false;
int index;
IN subtreeRoot = parent;
IN child = null;
IN grandParent = null;
boolean childIsLatched = false;
boolean grandParentIsLatched = false;
boolean doGrandparentLatching = !parent.isLatchExclusiveOwner();
TreeWalkerStatsAccumulator treeStatsAccumulator =
getTreeStatsAccumulator();
try {
do {
if (treeStatsAccumulator != null) {
parent.accumulateStats(treeStatsAccumulator);
}
assert(parent.getNEntries() > 0);
if (searchType == SearchType.NORMAL) {
/* Look for the entry matching key in the current node. */
index = parent.findEntry(key, false, false, comparator);
} else if (searchType == SearchType.LEFT) {
/* Left search, always take the 0th entry. */
index = 0;
} else if (searchType == SearchType.RIGHT) {
/* Right search, always take the highest entry. */
index = parent.getNEntries() - 1;
} else {
throw EnvironmentFailureException.unexpectedState(
"Invalid value of searchType: " + searchType);
}
assert(index >= 0);
/*
* Get the child IN.
*
* If the child is not cached and we are usimg grandparent
* latching, then:
*
* (a) If "parent" is not the subtree root, is is always
* SH-latched at this point. So, to fetch the child, we need to
* unlatch the parent and relatch it exclusively. Because we
* have the grandparent latch (in either SH or EX mode), the
* parent will not be evicted or detached from the tree and the
* index of the child within the parent won't change. After
* the parent is EX-latched, we can release the grandparent so
*. it won't be held while reading the child from the log.
*
* (b) If "parent" is the BTree root, it may be SH-latched. In
* this case, since there is no grandparent, we must unlatch
* the parent and relatch it in EX mode under the protection
* of the rootLatch; then we restart the do-loop.
*
* (c) If "parent" is the subtree root, but not the root of
* the full Btree, then it must be EX-latched already, and
* we can just fetch the child.
*/
child = (IN) parent.getTarget(index);
if (child == null && doGrandparentLatching) {
if (parent != subtreeRoot) {
assert(!parent.isLatchExclusiveOwner());
parent.releaseLatch();
parent.latch(cacheMode);
grandParent.releaseLatch();
grandParentIsLatched = false;
grandParent = null;
doGrandparentLatching = false;
} else if (parent.isRoot() &&
!parent.isLatchExclusiveOwner()) {
parent.releaseLatch();
subtreeRoot = getRootINLatchedExclusive(cacheMode);
parent = subtreeRoot;
assert(parent != null);
assert(grandParent == null);
doGrandparentLatching = false;
continue;
}
child = parent.fetchIN(index, cacheMode);
} else if (child == null) {
child = parent.fetchIN(index, CacheMode.UNCHANGED);
}
/* After fetching the child we can release the grandparent. */
if (grandParent != null) {
grandParent.releaseLatch();
grandParentIsLatched = false;
}
/* Latch the child. Note: BINs are always latched exclusive. */
if (child.getLevel() == targetLevel) {
if (latchShared) {
child.latchShared(cacheMode);
} else {
child.latch(cacheMode);
}
}
else if (doGrandparentLatching) {
} else {
latchChild(parent, child, cacheMode);
}
childIsLatched = true;
child.mutateToFullBIN(false /*leaveFreeSlot*/);
if (treeStatsAccumulator != null) {
child.accumulateStats(treeStatsAccumulator);
}
/* Continue down a level */
if (doGrandparentLatching) {
grandParent = parent;
grandParentIsLatched = true;
} else {
parent.releaseLatch();
}
parent = child;
} while (!parent.isBIN() && parent.getLevel() != targetLevel);
success = true;
return child;
} finally {
if (!success) {
/*
* In [#14903] we encountered a latch exception below and the
* original exception was lost. Print the stack trace and
* allow the original exception to be thrown if this happens
* again, to get more information about the problem.
*/
try {
if (child != null && childIsLatched) {
child.releaseLatch();
}
if (parent != child) {
parent.releaseLatch();
}
} catch (Exception e) {
LoggerUtils.traceAndLogException(
database.getEnv(), "Tree", "searchSubTreeInternal", "",
e);
}
}
if (grandParent != null && grandParentIsLatched) {
grandParent.releaseLatch();
}
}
}
/**
* rebuildINList is used by recovery to add all the resident nodes to the
* IN list.
*/
public void rebuildINList()
throws DatabaseException {
INList inMemoryList = database.getEnv().getInMemoryINs();
if (root != null) {
rootLatch.acquireShared();
try {
Node rootIN = root.getTarget();
if (rootIN != null) {
rootIN.rebuildINList(inMemoryList);
}
} finally {
rootLatch.release();
}
}
}
/**
* Debugging check that all resident nodes are on the INList and no stray
* nodes are present in the unused portion of the IN arrays.
*/
public void validateINList(IN parent)
throws DatabaseException {
if (parent == null) {
parent = (IN) root.getTarget();
}
if (parent != null) {
INList inList = database.getEnv().getInMemoryINs();
if (!inList.contains(parent)) {
throw EnvironmentFailureException.unexpectedState(
"IN " + parent.getNodeId() + " missing from INList");
}
for (int i = 0;; i += 1) {
try {
Node node = parent.getTarget(i);
if (i >= parent.getNEntries()) {
if (node != null) {
throw EnvironmentFailureException.unexpectedState(
"IN " + parent.getNodeId() +
" has stray node " + node +
" at index " + i);
}
byte[] key = parent.getKey(i);
if (key != null) {
throw EnvironmentFailureException.unexpectedState(
"IN " + parent.getNodeId() +
" has stray key " + key +
" at index " + i);
}
}
if (node instanceof IN) {
validateINList((IN) node);
}
} catch (ArrayIndexOutOfBoundsException e) {
break;
}
}
}
}
/*
* Logging support
*/
/**
* @see Loggable#getLogSize
*/
public int getLogSize() {
int size = 1; // rootExists
if (root != null) {
size += root.getLogSize();
}
return size;
}
/**
* @see Loggable#writeToLog
*/
public void writeToLog(ByteBuffer logBuffer) {
byte booleans = (byte) ((root != null) ? 1 : 0);
logBuffer.put(booleans);
if (root != null) {
root.writeToLog(logBuffer);
}
}
/**
* @see Loggable#readFromLog
*/
public void readFromLog(ByteBuffer itemBuffer, int entryVersion) {
boolean rootExists = false;
byte booleans = itemBuffer.get();
rootExists = (booleans & 1) != 0;
if (rootExists) {
root = makeRootChildReference();
root.readFromLog(itemBuffer, entryVersion);
}
}
/**
* @see Loggable#dumpLog
*/
public void dumpLog(StringBuilder sb, boolean verbose) {
sb.append("<root>");
if (root != null) {
root.dumpLog(sb, verbose);
}
sb.append("</root>");
}
/**
* @see Loggable#getTransactionId
*/
public long getTransactionId() {
return 0;
}
/**
* @see Loggable#logicalEquals
* Always return false, this item should never be compared.
*/
public boolean logicalEquals(Loggable other) {
return false;
}
private TreeWalkerStatsAccumulator getTreeStatsAccumulator() {
if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) {
return treeStatsAccumulatorTL.get();
} else {
return null;
}
}
public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) {
treeStatsAccumulatorTL.set(tSA);
}
/*
* Debugging stuff.
*/
public void dump() {
System.out.println(dumpString(0));
}
public String dumpString(int nSpaces) {
StringBuilder sb = new StringBuilder();
sb.append(TreeUtils.indent(nSpaces));
sb.append("<tree>");
sb.append('\n');
if (root != null) {
sb.append(DbLsn.dumpString(root.getLsn(), nSpaces));
sb.append('\n');
IN rootIN = (IN) root.getTarget();
if (rootIN == null) {
sb.append("<in/>");
} else {
sb.append(rootIN.toString());
}
sb.append('\n');
}
sb.append(TreeUtils.indent(nSpaces));
sb.append("</tree>");
return sb.toString();
}
/**
* Unit test support to validate subtree pruning. Didn't want to make root
* access public.
*/
boolean validateDelete(int index)
throws DatabaseException {
rootLatch.acquireShared();
try {
IN rootIN = (IN) root.fetchTarget(database, null);
rootIN.latch();
try {
return rootIN.validateSubtreeBeforeDelete(index);
} finally {
rootIN.releaseLatch();
}
} finally {
rootLatch.release();
}
}
/* For unit testing only. */
public void setWaitHook(TestHook hook) {
waitHook = hook;
}
/* For unit testing only. */
public void setSearchHook(TestHook hook) {
searchHook = hook;
}
/* For unit testing only. */
public void setCkptHook(TestHook hook) {
ckptHook = hook;
}
/* For unit testing only. */
public void setGetParentINHook(TestHook hook) {
getParentINHook = hook;
}
public void setFetchINHook(TestHook hook) {
fetchINHook = hook;
}
/**
* Send trace messages to the java.util.logger. Don't rely on the logger
* alone to conditionalize whether we send this message, we don't even want
* to construct the message if the level is not enabled.
*/
private void traceSplitRoot(Level level,
String splitType,
IN newRoot,
long newRootLsn,
IN oldRoot,
long oldRootLsn) {
Logger logger = database.getEnv().getLogger();
if (logger.isLoggable(level)) {
StringBuilder sb = new StringBuilder();
sb.append(splitType);
sb.append(" newRoot=").append(newRoot.getNodeId());
sb.append(" newRootLsn=").
append(DbLsn.getNoFormatString(newRootLsn));
sb.append(" oldRoot=").append(oldRoot.getNodeId());
sb.append(" oldRootLsn=").
append(DbLsn.getNoFormatString(oldRootLsn));
LoggerUtils.logMsg(
logger, database.getEnv(), level, sb.toString());
}
}
private static class SplitInfo {
IN parent;
IN child;
int index;
SplitInfo(IN parent, IN child, int index) {
this.parent = parent;
this.child = child;
this.index = index;
}
}
}