src/main/java/com/sleepycat/je/dbi/SortedLSNTreeWalker.java - doris-thirdparty - Git at Google

 /*-
  * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
  *
  * This file was distributed by Oracle as part of a version of Oracle Berkeley
  * DB Java Edition made available at:
  *
  * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
  *
  * Please see the LICENSE file included in the top-level directory of the
  * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
  * license and additional information.
  */

 package com.sleepycat.je.dbi;

 import java.io.FileNotFoundException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

 import com.sleepycat.je.CacheMode;
 import com.sleepycat.je.DatabaseEntry;
 import com.sleepycat.je.DatabaseException;
 import com.sleepycat.je.EnvironmentFailureException;
 import com.sleepycat.je.evictor.OffHeapCache;
 import com.sleepycat.je.log.ErasedException;
 import com.sleepycat.je.log.LogEntryType;
 import com.sleepycat.je.log.LogManager;
 import com.sleepycat.je.log.WholeEntry;
 import com.sleepycat.je.log.entry.BINDeltaLogEntry;
 import com.sleepycat.je.log.entry.LNLogEntry;
 import com.sleepycat.je.log.entry.LogEntry;
 import com.sleepycat.je.log.entry.OldBINDeltaLogEntry;
 import com.sleepycat.je.tree.BIN;
 import com.sleepycat.je.tree.IN;
 import com.sleepycat.je.tree.LN;
 import com.sleepycat.je.tree.Node;
 import com.sleepycat.je.tree.OldBINDelta;
 import com.sleepycat.je.utilint.DbLsn;
 import com.sleepycat.je.utilint.SizeofMarker;

 /**
  * SortedLSNTreeWalker uses ordered disk access rather than random access to
  * iterate over a database tree. Faulting in data records by on-disk order can
  * provide much improved performance over faulting in by key order, since the
  * latter may require random access.  SortedLSN walking does not obey cursor
  * and locking constraints, and therefore can only be guaranteed consistent for
  * a quiescent tree which is not being modified by user or daemon threads.
  *
  * The class walks over the tree using sorted LSN fetching for parts of the
  * tree that are not in memory. It returns LSNs for each node in the tree,
  * <b>except</b> the root IN, in an arbitrary order (i.e. not key
  * order). The caller is responsible for getting the root IN's LSN explicitly.
  * <p>
  * A callback function specified in the constructor is executed for each LSN
  * found.
  * <p>
  * The walker works in two phases.  The first phase is to gather and return all
  * the resident INs using the roots that were specified when the SLTW was
  * constructed.  For each child of each root, if the child is resident it is
  * passed to the callback method (processLSN).  If the child was not in memory,
  * it is added to a list of LSNs to read.  When all of the in-memory INs have
  * been passed to the callback for all LSNs collected, phase 1 is complete.
  * <p>
  * In phase 2, for each of the sorted LSNs, the target is fetched, the type
  * determined, and the LSN and type passed to the callback method for
  * processing.  LSNs of the children of those nodes are retrieved and the
  * process repeated until there are no more nodes to be fetched for this
  * database's tree.  LSNs are accumulated in batches in this phase so that
  * memory consumption is not excessive.  For instance, if batches were not used
  * then the LSNs of all of the BINs would need to be held in memory.
  */
 public class SortedLSNTreeWalker {

     /*
      * The interface for calling back to the user with each LSN.
      */
     public interface TreeNodeProcessor {
         void processLSN(long childLSN,
                         LogEntryType childType,
                         Node theNode,
                         byte[] lnKey,
                         int lastLoggedSize,
                         boolean isEmbedded)
             throws FileNotFoundException;

         /* Used for processing dirty (unlogged) deferred write LNs. [#15365] */
         void processDirtyDeletedLN(long childLSN, LN ln, byte[] lnKey);

         /* Called when the internal memory limit is exceeded. */
         void noteMemoryExceeded();
     }

     /*
      * Optionally passed to the SortedLSNTreeWalker to be called when an
      * exception occurs.
      */
     interface ExceptionPredicate {
         /* Return true if the exception can be ignored. */
         boolean ignoreException(Exception e);
     }

     final DatabaseImpl[] dbImpls;
     protected final EnvironmentImpl envImpl;

     /*
      * Save the root LSN at construction time, because the root may be
      * nulled out before walk() executes.
      */
     private final long[] rootLsns;

     /* The limit on memory to be used for internal structures during SLTW. */
     private long internalMemoryLimit = Long.MAX_VALUE;

     /* The current memory usage by internal SLTW structures. */
     private long internalMemoryUsage;

     private final TreeNodeProcessor callback;

     /*
      * If true, then walker should fetch LNs and pass them to the
      * TreeNodeProcessor callback method.  Even if true, dup LNs are not
      * fetched because they are normally never used (see accumulateDupLNs).
      */
     public boolean accumulateLNs = false;

     boolean preloadIntoOffHeapCache = false;

     /*
      * If true, fetch LNs in a dup DB.  Since LNs in a dup DB are not used by
      * cursor operations, fetching dup LNs should only be needed in very
      * exceptional situations.  Currently this field is never set to true.
      */
     boolean accumulateDupLNs = false;

     /*
      * If non-null, save any exceptions encountered while traversing nodes into
      * this savedException list, in order to walk as much of the tree as
      * possible. The caller of the tree walker will handle the exceptions.
      */
     private final List<DatabaseException> savedExceptions;

     private final ExceptionPredicate excPredicate;

     /*
      * The batch size of LSNs which will be sorted.
      */
     private long lsnBatchSize = Long.MAX_VALUE;

     /* Holder for returning LN key from fetchLSN. */
     private final DatabaseEntry lnKeyEntry = new DatabaseEntry();

     /*
      * This map provides an LSN to IN/index. When an LSN is processed by the
      * tree walker, the map is used to lookup the parent IN and child entry
      * index of each LSN processed by the tree walker.  Since fetchLSN is
      * called with an arbitrary LSN, and since when we fetch (for preload) we
      * need to setup the parent to refer to the node which we are prefetching,
      * we need to have the parent in hand at the time of the call to fetchLSN.
      * This map allows us to keep a reference to that parent so that we can
      * call fetchNode on that parent.
      *
      * It is also necessary to maintain this map for cases other than preload()
      * so that during multi-db walks (i.e. multi db preload), we can associate
      * an arbitrary LSN back to the parent IN and therefore connect a fetch'ed
      * Node into the proper place in the tree.
      *
      * LSN -> INEntry
      */
     /* struct to hold IN/entry-index pair. */
     public static class INEntry {
         final IN in;
         final int index;

         INEntry(IN in, int index) {
             assert in != null;
             assert in.getDatabase() != null;
             this.in = in;
             this.index = index;
         }

         public INEntry(@SuppressWarnings("unused") SizeofMarker marker) {
             this.in = null;
             this.index = 0;
         }

         Object getDelta() {
             return null;
         }

         long getDeltaLsn() {
             return DbLsn.NULL_LSN;
         }

         long getMemorySize() {
             return MemoryBudget.HASHMAP_ENTRY_OVERHEAD +
                    MemoryBudget.INENTRY_OVERHEAD;
         }
     }

     /**
      * Supplements INEntry with BIN-delta information.  When a BIN-delta is
      * encountered during the fetching process, we cannot immediately place it
      * in the tree.  Instead we queue a DeltaINEntry for fetching the full BIN,
      * in LSN order as usual.  When the full BIN is fetched, the DeltaINEntry
      * is used to apply the delta and place the result in the tree.
      */
     public static class DeltaINEntry extends INEntry {
         private final Object delta;
         private final long deltaLsn;

         DeltaINEntry(IN in, int index, Object delta, long deltaLsn) {
             super(in, index);
             assert (delta != null);
             assert (deltaLsn != DbLsn.NULL_LSN);
             this.delta = delta;
             this.deltaLsn = deltaLsn;
         }

         public DeltaINEntry(@SuppressWarnings("unused") SizeofMarker marker) {
             super(marker);
             this.delta = null;
             this.deltaLsn = 0;
         }

         @Override
         Object getDelta() {
             return delta;
         }

         @Override
         long getDeltaLsn() {
             return deltaLsn;
         }

         @Override
         long getMemorySize() {
             final long deltaSize;
             if (delta instanceof OldBINDelta) {
                 deltaSize = ((OldBINDelta) delta).getMemorySize();
             } else {
                 deltaSize = ((BIN) delta).getInMemorySize();
             }
             return deltaSize +
                 MemoryBudget.HASHMAP_ENTRY_OVERHEAD +
                 MemoryBudget.DELTAINENTRY_OVERHEAD;
         }
     }

     private final Map<Long, INEntry> lsnINMap = new HashMap<>();

     /*
      * @param dbImpls an array of DatabaseImpls which should be walked over
      * in disk order.  This array must be parallel to the rootLsns array in
      * that rootLsns[i] must be the root LSN for dbImpls[i].
      *
      * @param rootLsns is passed in addition to the dbImpls, because the
      * root may be nulled out on the dbImpl before walk() is called.
      *
      * @param callback the callback instance
      *
      * @param savedExceptions a List of DatabaseExceptions encountered during
      * the tree walk.
      *
      * @param excPredicate a predicate to determine whether a given exception
      * should be ignored.
      */
     public SortedLSNTreeWalker(DatabaseImpl[] dbImpls,
                                long[] rootLsns,
                                TreeNodeProcessor callback,
                                List<DatabaseException> savedExceptions,
                                ExceptionPredicate excPredicate) {

         if (dbImpls == null || dbImpls.length < 1) {
             throw EnvironmentFailureException.unexpectedState
                 ("DatabaseImpls array is null or 0-length for " +
                  "SortedLSNTreeWalker");
         }

         this.dbImpls = dbImpls;
         this.envImpl = dbImpls[0].getEnv();
         /* Make sure all databases are from the same environment. */
         for (DatabaseImpl di : dbImpls) {
             EnvironmentImpl ei = di.getEnv();
             if (ei == null) {
                 throw EnvironmentFailureException.unexpectedState
                     ("environmentImpl is null for target db " +
                      di.getName());
             }

             if (ei != this.envImpl) {
                 throw new IllegalArgumentException
                     ("Environment.preload() must be called with Databases " +
                      "which are all in the same Environment. (" +
                      di.getName() + ")");
             }
         }

         this.rootLsns = rootLsns;
         this.callback = callback;
         this.savedExceptions = savedExceptions;
         this.excPredicate = excPredicate;
     }

     void setLSNBatchSize(long lsnBatchSize) {
         this.lsnBatchSize = lsnBatchSize;
     }

     public void setInternalMemoryLimit(long internalMemoryLimit) {
         this.internalMemoryLimit = internalMemoryLimit;
     }

     private void incInternalMemoryUsage(long increment) {
         internalMemoryUsage += increment;
     }

     private LSNAccumulator createLSNAccumulator() {
         return new LSNAccumulator() {
             @Override
             void noteMemUsage(long increment) {
                 incInternalMemoryUsage(increment);
             }
         };
     }

     /**
      * Find all non-resident nodes, and execute the callback.  The root IN's
      * LSN is not returned to the callback.
      */
     public void walk() {
         walkInternal();
     }

     void walkInternal() {

         /*
          * Phase 1: seed the SLTW with all of the roots of the DatabaseImpl[].
          * For each root, look for all in-memory child nodes and process them
          * (i.e. invoke the callback on those LSNs).  For child nodes which are
          * not in-memory (i.e. they are LSNs only and no Node references),
          * accumulate their LSNs to be later sorted and processed during phase
          * 2.
          */
         LSNAccumulator pendingLSNs = createLSNAccumulator();
         for (int i = 0; i < dbImpls.length; i += 1) {
             processRootLSN(dbImpls[i], pendingLSNs, rootLsns[i]);
         }

         /*
          * Phase 2: Sort and process any LSNs we've gathered so far. For each
          * LSN, fetch the target record and process it as in Phase 1 (i.e.
          * in-memory children get passed to the callback, not in-memory children
          * have their LSN accumulated for later sorting, fetching, and
          * processing.
          */
         processAccumulatedLSNs(pendingLSNs);
     }

     /*
      * Retrieve the root for the given DatabaseImpl and then process its
      * children.
      */
     private void processRootLSN(DatabaseImpl dbImpl,
                                 LSNAccumulator pendingLSNs,
                                 long rootLsn) {
         IN root = getOrFetchRootIN(dbImpl, rootLsn);
         if (root != null) {
             try {
                 accumulateLSNs(root, pendingLSNs, null, -1);
             } finally {
                 releaseRootIN(root);
             }
         }
     }

     /*
      * Traverse the in-memory tree rooted at "parent". For each visited node N
      * call the callback method on N and put in pendingLSNs the LSNs of N's
      * non-resident children.
      *
      * On entering this method, parent is latched and remains latched on exit.
      */
     private void accumulateLSNs(final IN parent,
                                 final LSNAccumulator pendingLSNs,
                                 final IN ohBinParent,
                                 final int ohBinIndex) {
         envImpl.checkOpen();

         final DatabaseImpl db = parent.getDatabase();
         final boolean dups = db.getSortedDuplicates();

         /*
          * Without dups, all BINs contain only LN children.  With dups, it
          * depends on the dup format.  Preload works with the old dup format
          * and the new.
          *
          * In the new dup format (or after dup conversion), BINs contain only
          * LNs and no DBINs exist.  In the old dup format, DBINs contain only
          * LN children, but BINs may contain a mix of LNs and DINs.
          */
         final boolean allChildrenAreLNs;
         if (!dups || db.getDupsConverted()) {
             allChildrenAreLNs = parent.isBIN();
         } else {
             allChildrenAreLNs = parent.isBIN() && parent.containsDuplicates();
         }

         /*
          * If LNs are not needed, there is no need to accumulate the child LSNs
          * when all children are LNs.
          */
         final boolean accumulateChildren =
             !allChildrenAreLNs || (dups ? accumulateDupLNs : accumulateLNs);

         final BIN parentBin = parent.isBIN() ? ((BIN) parent) : null;
         final OffHeapCache ohCache = envImpl.getOffHeapCache();

         /*
          * Process all children, but only accumulate LSNs for children that are
          * not in memory.
          */
         for (int i = 0; i < parent.getNEntries(); i += 1) {

             final long lsn = parent.getLsn(i);
             Node child = parent.getTarget(i);
             final boolean childCached = child != null;
             final boolean isEmbedded = parent.isEmbeddedLN(i);

             final byte[] lnKey =
                 (allChildrenAreLNs || (childCached && child.isLN())) ?
                 parent.getKey(i) : null;

             if (parentBin != null && parentBin.isDefunct(i)) {

                 /* Dirty LNs (deferred write) get special treatment. */
                 processDirtyLN(child, lsn, lnKey);
                 /* continue; */

             } else if (!childCached &&
                 parentBin != null &&
                 parentBin.getOffHeapLNId(i) != 0) {

                 /* Embedded LNs are not stored off-heap */
                 assert !isEmbedded;

                 child = ohCache.loadLN(parentBin, i, CacheMode.UNCHANGED);
                 assert child != null;

                 processChild(
                     lsn, child, lnKey, parent.getLastLoggedSize(i),
                     false /*isEmbedded*/, pendingLSNs, null, -1);

             } else if (!childCached && parent.getOffHeapBINId(i) >= 0) {

                 /* Embedded LNs are not stored off-heap */
                 assert !isEmbedded;

                 child = ohCache.materializeBIN(
                     envImpl, ohCache.getBINBytes(parent, i));

                 final BIN bin = (BIN) child;
                 bin.latchNoUpdateLRU(db);
                 boolean isLatched = true;

                 try {
                     if (bin.isBINDelta()) {

                         /* Deltas not allowed with deferred-write. */
                         assert (lsn != DbLsn.NULL_LSN);

                         /*
                          * Storing an off-heap reference would use less memory,
                          * but we prefer to optimize in the future by
                          * re-implementing preload.
                          */
                         final long fullLsn = bin.getLastFullLsn();
                         assert fullLsn != DbLsn.NULL_LSN;
                         pendingLSNs.add(fullLsn);
                         addToLsnINMap(fullLsn, parent, i, bin, lsn);

                     } else {

                         bin.releaseLatch();
                         isLatched = false;

                         processChild(
                             lsn, bin, lnKey, parent.getLastLoggedSize(i),
                             false /*isEmbedded*/, pendingLSNs, parent, i);
                     }
                 } finally {
                     if (isLatched) {
                         bin.releaseLatch();
                     }
                 }

             } else if (accumulateChildren &&
                        !childCached &&
                        lsn != DbLsn.NULL_LSN) {

                 /*
                  * Child is not in cache. Put its LSN in the current batch of
                  * LSNs to be sorted and fetched in phase 2. But don't do
                  * this if the child is an embedded LN.
                  */
                 if (!isEmbedded) {
                     pendingLSNs.add(lsn);
                     if (ohBinParent != null) {
                         addToLsnINMap(lsn, ohBinParent, ohBinIndex);
                     } else {
                         addToLsnINMap(lsn, parent, i);
                     }
                 } else {
                     processChild(
                         DbLsn.NULL_LSN, null /*child*/, lnKey,
                         0 /*lastLoggedSize*/, true /*isEmbedded*/,
                         pendingLSNs, null, -1);
                 }

             } else if (childCached) {

                 child.latchShared();
                 boolean isLatched = true;

                 try {
                     if (child.isBINDelta()) {

                         /* Deltas not allowed with deferred-write. */
                         assert (lsn != DbLsn.NULL_LSN);

                         final BIN delta = (BIN) child;
                         final long fullLsn = delta.getLastFullLsn();
                         pendingLSNs.add(fullLsn);
                         addToLsnINMap(fullLsn, parent, i, delta, lsn);

                     } else {

                         child.releaseLatch();
                         isLatched = false;

                         processChild(
                             lsn, child, lnKey, parent.getLastLoggedSize(i),
                             isEmbedded, pendingLSNs, null, -1);
                     }
                 } finally {
                     if (isLatched) {
                         child.releaseLatch();
                     }
                 }

             } else {
                 /*
                  * We are here because the child was not cached and was not
                  * accumulated either (because it was an LN and LN accumulation
                  * is turned off or its LSN was NULL).
                  */
                 processChild(
                     lsn, null /*child*/, lnKey, parent.getLastLoggedSize(i),
                     isEmbedded, pendingLSNs, null, -1);
             }

             /*
              * If we've exceeded the batch size then process the current
              * batch and start a new one.
              */
             final boolean internalMemoryExceeded =
                 internalMemoryUsage > internalMemoryLimit;

             if (pendingLSNs.getNTotalEntries() > lsnBatchSize ||
                 internalMemoryExceeded) {
                 if (internalMemoryExceeded) {
                     callback.noteMemoryExceeded();
                 }
                 processAccumulatedLSNs(pendingLSNs);
                 pendingLSNs.clear();
             }
         }
     }

     private void processDirtyLN(Node node, long lsn, byte[] lnKey) {
         if (node != null && node.isLN()) {
             LN ln = (LN) node;
             if (ln.isDirty()) {
                 callback.processDirtyDeletedLN(lsn, ln, lnKey);
             }
         }
     }

     private void processChild(
         final long lsn,
         final Node child,
         final byte[] lnKey,
         final int lastLoggedSize,
         final boolean isEmbedded,
         final LSNAccumulator pendingLSNs,
         final IN ohBinParent,
         final int ohBinIndex) {

         final boolean childCached = (child != null);

         /*
          * If the child is resident, use its log type, else it must be an LN.
          */
         callProcessLSNHandleExceptions(
             lsn,
             (!childCached ?
              LogEntryType.LOG_INS_LN /* Any LN type will do */ :
              child.getGenericLogType()),
             child, lnKey, lastLoggedSize, isEmbedded);

         if (childCached && child.isIN()) {
             final IN nodeAsIN = (IN) child;
             try {
                 nodeAsIN.latch(CacheMode.UNCHANGED);
                 accumulateLSNs(nodeAsIN, pendingLSNs, ohBinParent, ohBinIndex);
             } finally {
                 nodeAsIN.releaseLatch();
             }
         }
     }

     /*
      * Process a batch of LSNs by sorting and fetching each of them.
      */
     private void processAccumulatedLSNs(LSNAccumulator pendingLSNs) {

         while (!pendingLSNs.isEmpty()) {
             final long[] currentLSNs = pendingLSNs.getAndSortPendingLSNs();
             pendingLSNs = createLSNAccumulator();
             for (long lsn : currentLSNs) {
                 fetchAndProcessLSN(lsn, pendingLSNs);
             }
         }
     }

     /*
      * Fetch the node at 'lsn' and callback to let the invoker process it.  If
      * it is an IN, accumulate LSNs for it.
      */
     private void fetchAndProcessLSN(long lsn, LSNAccumulator pendingLSNs) {

         lnKeyEntry.setData(null);

         final FetchResult result = fetchLSNHandleExceptions(
             lsn, lnKeyEntry, pendingLSNs);

         if (result == null) {
             return;
         }

         final boolean isIN = result.node.isIN();
         final IN in;
         if (isIN) {
             in = (IN) result.node;
             in.latch(CacheMode.UNCHANGED);
         } else {
             in = null;
         }

         try {
             callProcessLSNHandleExceptions(
                 lsn, result.node.getGenericLogType(), result.node,
                 lnKeyEntry.getData(), result.lastLoggedSize,
                 false /*isEmbedded*/);

             if (isIN) {
                 accumulateLSNs(
                     in, pendingLSNs, result.ohBinParent, result.ohBinIndex);
             }
         } finally {
             if (isIN) {
                 in.releaseLatch();
             }
         }
     }

     private FetchResult fetchLSNHandleExceptions(
         long lsn,
         DatabaseEntry lnKeyEntry,
         LSNAccumulator pendingLSNs) {

         DatabaseException dbe = null;

         try {
             return fetchLSN(lsn, lnKeyEntry, pendingLSNs);

         } catch (DatabaseException e) {
             if (excPredicate == null ||
                 !excPredicate.ignoreException(e)) {
                 dbe = e;
             }
         }

         if (dbe != null) {
             if (savedExceptions != null) {

                 /*
                  * This LSN fetch hit a failure. Do as much of the rest of
                  * the tree as possible.
                  */
                 savedExceptions.add(dbe);
             } else {
                 throw dbe;
             }
         }

         return null;
     }

     private void callProcessLSNHandleExceptions(long childLSN,
                                                 LogEntryType childType,
                                                 Node theNode,
                                                 byte[] lnKey,
                                                 int lastLoggedSize,
                                                 boolean isEmbedded) {
         DatabaseException dbe = null;

         try {
             callback.processLSN(
                 childLSN, childType, theNode, lnKey, lastLoggedSize,
                 isEmbedded);

         } catch (FileNotFoundException e) {
             if (excPredicate == null ||
                 !excPredicate.ignoreException(e)) {
                 dbe = new EnvironmentFailureException(
                     envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e);
             }

         } catch (DatabaseException e) {
             if (excPredicate == null ||
                 !excPredicate.ignoreException(e)) {
                 dbe = e;
             }
         }

         if (dbe != null) {
             if (savedExceptions != null) {

                 /*
                  * This LSN fetch hit a failure. Do as much of the rest of
                  * the tree as possible.
                  */
                 savedExceptions.add(dbe);
             } else {
                 throw dbe;
             }
         }
     }

     /**
      * Returns the root IN, latched shared.  Allows subclasses to override
      * getResidentRootIN and/or getRootIN to modify behavior.
      * getResidentRootIN is called first,
      */
     private IN getOrFetchRootIN(DatabaseImpl dbImpl, long rootLsn) {
         final IN root = getResidentRootIN(dbImpl);
         if (root != null) {
             return root;
         }
         if (rootLsn == DbLsn.NULL_LSN) {
             return null;
         }
         return getRootIN(dbImpl, rootLsn);
     }

     /**
      * The default behavior fetches the rootIN from the log and latches it
      * shared. Classes extending this may fetch (and latch) the root from the
      * tree.
      */
     IN getRootIN(DatabaseImpl dbImpl, long rootLsn) {
         final IN root = (IN)
             envImpl.getLogManager().getEntryHandleNotFound(rootLsn);
         if (root == null) {
             return null;
         }
         root.setDatabase(dbImpl);
         root.latchShared(CacheMode.DEFAULT);
         return root;
     }

     /**
      * The default behavior returns (and latches shared) the IN if it is
      * resident in the Btree, or null otherwise.  Classes extending this may
      * return (and latch) a known IN object.
      */
     public IN getResidentRootIN(DatabaseImpl dbImpl) {
         return dbImpl.getTree().getResidentRootIN(true /*latched*/);
     }

     /**
      * Release the latch.  Overriding this method should not be necessary.
      */
     private void releaseRootIN(IN root) {
         root.releaseLatch();
     }

     /**
      * Add an LSN-IN/index entry to the map.
      */
     private void addToLsnINMap(long lsn, IN in, int index) {
         addEntryToLsnMap(lsn, new INEntry(in, index));
     }

     /**
      * Add an LSN-IN/index entry, along with a delta and delta LSN, to the map.
      */
     private void addToLsnINMap(long lsn,
                                IN in,
                                int index,
                                Object delta,
                                long deltaLsn) {
         addEntryToLsnMap(lsn, new DeltaINEntry(in, index, delta, deltaLsn));
     }

     private void addEntryToLsnMap(long lsn, INEntry inEntry) {
         if (lsnINMap.put(lsn, inEntry) == null) {
             incInternalMemoryUsage(inEntry.getMemorySize());
         }
     }

     private static class FetchResult {
         final Node node;
         final int lastLoggedSize;
         final IN ohBinParent;
         final int ohBinIndex;

         FetchResult(final Node node,
                     final int lastLoggedSize,
                     final IN ohBinParent,
                     final int ohBinIndex) {
             this.node = node;
             this.lastLoggedSize = lastLoggedSize;
             this.ohBinParent = ohBinParent;
             this.ohBinIndex = ohBinIndex;
         }
     }

     /*
      * Process an LSN.  Get & remove its INEntry from the map, then fetch the
      * target at the INEntry's IN/index pair.  This method will be called in
      * sorted LSN order.
      */
     private FetchResult fetchLSN(
         long lsn,
         DatabaseEntry lnKeyEntry,
         LSNAccumulator pendingLSNs) {

         final LogManager logManager = envImpl.getLogManager();
         final OffHeapCache ohCache = envImpl.getOffHeapCache();

         final INEntry inEntry = lsnINMap.remove(lsn);
         assert (inEntry != null) : DbLsn.getNoFormatString(lsn);

         incInternalMemoryUsage(- inEntry.getMemorySize());

         IN in = inEntry.in;
         int index = inEntry.index;

         IN ohBinParent = null;
         int ohBinIndex = -1;

         IN in1ToUnlatch = null;
         IN in2ToUnlatch = null;

         if (!in.isLatchExclusiveOwner()) {
             in.latch();
             in1ToUnlatch = in;
         }

         final DatabaseImpl dbImpl = in.getDatabase();
         byte[] lnKey = null;

         Node residentNode = in.getTarget(index);
         if (residentNode != null) {
             residentNode.latch();
         }

         try {
             /*
              * When the indexed slot contains an off-heap BIN, the node to
              * fetch is an LN within the off-heap BIN or the full BIN to merge
              * with an off-heap BIN-delta.
              */
             Object deltaObject = inEntry.getDelta();
             boolean isOffHeapBinInTree = in.getOffHeapBINId(index) >= 0;
             boolean isLnInOffHeapBin = false;

             if (isOffHeapBinInTree && deltaObject == null) {
                 /*
                  * When fetching an LN within an off-heap BIN, materialize the
                  * parent BIN and set in/index to this true parent.
                  */
                 isLnInOffHeapBin = true;

                 final BIN ohBin = ohCache.materializeBIN(
                     envImpl, ohCache.getBINBytes(in, index));

                 int foundIndex = -1;
                 for (int i = 0; i < ohBin.getNEntries(); i += 1) {
                     if (ohBin.getLsn(i) == lsn) {
                         foundIndex = i;
                         break;
                     }
                 }

                 if (foundIndex == -1) {
                     return null; // See note on concurrent activity below.
                 }

                 ohBinParent = in;
                 ohBinIndex = index;

                 in = ohBin;
                 index = foundIndex;

                 in.latchNoUpdateLRU(dbImpl);
                 in2ToUnlatch = in;
             }

             /*
              * Concurrent activity (e.g., log cleaning) that was active before
              * we took the root latch may have changed the state of a slot.
              * Repeat check for LN deletion/expiration and check that the LSN
              * has not changed.
              */
             if (in.isBIN() && ((BIN) in).isDefunct(index)) {
                 return null;
             }

             if (deltaObject == null) {
                 if (in.getLsn(index) != lsn) {
                     return null;
                 }
             } else {
                 if (in.getLsn(index) != inEntry.getDeltaLsn()) {
                     return null;
                 }
             }

             boolean mutateResidentDeltaToFullBIN = false;

             if (residentNode != null) {
                 /*
                  * If the resident node is not a delta then concurrent
                  * activity (e.g., log cleaning) must have loaded the node.
                  * Just return it and continue.
                  */
                 if (!residentNode.isBINDelta()) {
                     if (residentNode.isLN()) {
                         lnKeyEntry.setData(in.getKey(index));
                     }
                     return new FetchResult(
                         residentNode, in.getLastLoggedSize(index), null, -1);
                 }

                 /* The resident node is a delta. */
                 if (((BIN) residentNode).getLastFullLsn() != lsn) {
                     return null; // See note on concurrent activity above.
                 }
                 mutateResidentDeltaToFullBIN = true;
             }

             /* Fetch log entry. */
             final WholeEntry wholeEntry;
             try {
                 wholeEntry = logManager.getWholeLogEntry(lsn);

             } catch (FileNotFoundException|ErasedException e) {
                 final String msg =
                     (fetchAndInsertIntoTree() ?
                         "Preload failed" :
                         "SortedLSNTreeWalker failed") +
                     " dbId=" + dbImpl.getId() +
                     " isOffHeapBinInTree=" + isOffHeapBinInTree +
                     " isLnInOffHeapBin=" + isLnInOffHeapBin +
                     " deltaObject=" + (deltaObject != null) +
                     " residentNode=" + (residentNode != null);

                 throw new EnvironmentFailureException(
                     envImpl,
                     (e instanceof FileNotFoundException) ?
                         EnvironmentFailureReason.LOG_FILE_NOT_FOUND :
                         EnvironmentFailureReason.LOG_CHECKSUM,
                     in.makeFetchErrorMsg(msg, lsn, index), e);
             }

             final LogEntry entry = wholeEntry.getEntry();
             final int lastLoggedSize = wholeEntry.getHeader().getEntrySize();

             /*
              * For a BIN delta, queue fetching of the full BIN and combine the
              * full BIN with the delta when it is processed later (see below).
              *
              * Note that for preload, this means that a BIN-delta is not placed
              * in the tree when there is not enough memory for the full BIN.
              * Ideally we should place the BIN-delta in the tree here.
              */
             if (entry instanceof BINDeltaLogEntry) {
                 final BINDeltaLogEntry deltaEntry = (BINDeltaLogEntry) entry;
                 final long fullLsn = deltaEntry.getPrevFullLsn();
                 final BIN delta = deltaEntry.getMainItem();
                 pendingLSNs.add(fullLsn);
                 addToLsnINMap(fullLsn, in, index, delta, lsn);
                 return null;
             }

             if (entry instanceof OldBINDeltaLogEntry) {
                 final OldBINDelta delta = (OldBINDelta) entry.getMainItem();
                 final long fullLsn = delta.getLastFullLsn();
                 pendingLSNs.add(fullLsn);
                 addToLsnINMap(fullLsn, in, index, delta, lsn);
                 return null;
             }

             /* For an LNLogEntry, call postFetchInit and get the lnKey. */
             if (entry instanceof LNLogEntry) {
                 final LNLogEntry<?> lnEntry = (LNLogEntry<?>) entry;
                 lnEntry.postFetchInit(dbImpl);
                 lnKey = lnEntry.getKey();
                 lnKeyEntry.setData(lnKey);
             }

             /* Get the Node from the LogEntry. */
             final Node ret = (Node) entry.getResolvedItem(dbImpl);

             /*
              * For an IN Node, set the database so it will be passed down to
              * nested fetches.
              */
             long lastLoggedLsn = lsn;
             if (ret.isIN()) {
                 final IN retIn = (IN) ret;
                 retIn.setDatabase(dbImpl);
             }

             /*
              * If there is a delta, then this is the full BIN to which the
              * delta must be applied. The delta LSN is the last logged LSN.
              */
             if (mutateResidentDeltaToFullBIN) {
                 final BIN fullBIN = (BIN) ret;
                 BIN delta = (BIN) residentNode;
                 if (fetchAndInsertIntoTree()) {
                     delta.mutateToFullBIN(fullBIN, false /*leaveFreeSlot*/);

                     return new FetchResult(
                         residentNode, lastLoggedSize, ohBinParent, ohBinIndex);
                 } else {
                     delta.reconstituteBIN(
                         dbImpl, fullBIN, false /*leaveFreeSlot*/);

                     return new FetchResult(
                         ret, lastLoggedSize, ohBinParent, ohBinIndex);
                 }
             }

             if (deltaObject != null) {
                 final BIN fullBIN = (BIN) ret;

                 if (deltaObject instanceof OldBINDelta) {
                     final OldBINDelta delta = (OldBINDelta) deltaObject;
                     assert lsn == delta.getLastFullLsn();
                     delta.reconstituteBIN(dbImpl, fullBIN);
                     lastLoggedLsn = inEntry.getDeltaLsn();
                 } else {
                     final BIN delta = (BIN) deltaObject;
                     assert lsn == delta.getLastFullLsn();

                     delta.reconstituteBIN(
                         dbImpl, fullBIN, false /*leaveFreeSlot*/);

                     lastLoggedLsn = inEntry.getDeltaLsn();
                 }
             }

             assert !ret.isBINDelta(false);

             /*
              * When we store an off-heap BIN here, the caller must pass its
              * parent/index to accumulateLSNs.
              */
             IN retOhBinParent = null;
             int retOhBinIndex = -1;

             /* During a preload, finally place the Node into the Tree. */
             if (fetchAndInsertIntoTree()) {

                 /* Last logged size is not present before log version 9. */
                 in.setLastLoggedSize(index, lastLoggedSize);

                 /*
                  * We don't worry about the memory usage being kept below the
                  * max by the evictor, since we keep the root INs latched.
                  */
                 final MemoryBudget memBudget = envImpl.getMemoryBudget();
                 final boolean storeOffHeap =
                     preloadIntoOffHeapCache &&
                     memBudget.getCacheMemoryUsage() > memBudget.getMaxMemory();

                 /*
                  * Note that UINs are always stored in the main cache even if
                  * it is full. The idea is that LNs and BINs should be evicted
                  * from main to make room. When the main cache fills with UINs,
                  * and an off-heap cache is also being filled, we currently
                  * allow the main cache to overflow.
                  */
                 if (isOffHeapBinInTree || (storeOffHeap && !ret.isUpperIN())) {
                     if (ret.isLN()) {
                         /*
                          * Store LN off-heap. If an oh LN was added to an oh
                          * BIN we must re-store the oh BIN as well. This is
                          * inefficient but we don't know of a simple way to
                          * optimize.
                          */
                         final BIN bin = (BIN) in;
                         final LN retLn = (LN) ret;
                         ohCache.storePreloadedLN(bin, index, retLn);
                         if (isOffHeapBinInTree) {
                             assert isLnInOffHeapBin;
                             ohCache.storePreloadedBIN(
                                 bin, ohBinParent, ohBinIndex);
                         }
                     } else {
                         /*
                          * Store full BIN off-heap. Note that setLastLoggedLSN
                          * is normally called by postFetchInit or postLoadInit,
                          * but neither is used during preload so we must call
                          * setLastLoggedLsn here.
                          */
                         assert !isLnInOffHeapBin;
                         final BIN retBin = (BIN) ret;
                         retBin.latchNoUpdateLRU(dbImpl);
                         retBin.setLastLoggedLsn(lsn);
                         try {
                             if (!ohCache.storePreloadedBIN(
                                 retBin, in, index)) {
                                 return null; // could not allocate memory
                             }
                         } finally {
                             retBin.releaseLatch();
                         }
                         retOhBinParent = in;
                         retOhBinIndex = index;
                     }
                 } else {
                     /* Attach node to the Btree as in a normal operation. */
                     if (ret.isIN()) {
                         final IN retIn = (IN) ret;
                         retIn.latchNoUpdateLRU(dbImpl);
                         ret.postFetchInit(dbImpl, lastLoggedLsn);
                         in.attachNode(index, ret, lnKey);
                         retIn.releaseLatch();
                     } else {
                         ret.postFetchInit(dbImpl, lastLoggedLsn);
                         in.attachNode(index, ret, lnKey);
                     }

                     /* BINs with resident LNs shouldn't be in the dirty LRU. */
                     if (in.isBIN()) {
                         final CacheMode mode =
                             in.getDatabase().getDefaultCacheMode();

                         if (mode != CacheMode.EVICT_LN) {
                             envImpl.getEvictor().moveToPri1LRU(in);
                         }
                     }
                 }

                 /*
                  * Clear the fetched-cold flag set, since we want the preloaded
                  * data to be "hot". This is necessary because the node is not
                  * latched after being preloaded, as it normally would be after
                  * being attached.
                  */
                 if (ret.isIN()) {
                     ((IN) ret).setFetchedCold(false);
                 } else if (ret.isLN()) {
                     ((LN) ret).setFetchedCold(false);
                 }
             }

             return new FetchResult(
                 ret, lastLoggedSize, retOhBinParent, retOhBinIndex);

         } finally {
             if (residentNode != null) {
                 residentNode.releaseLatch();
             }
             if (in1ToUnlatch != null) {
                 in1ToUnlatch.releaseLatch();
             }
             if (in2ToUnlatch != null) {
                 in2ToUnlatch.releaseLatch();
             }
         }
     }

     /*
      * Overriden by subclasses if fetch of an LSN should result in insertion
      * into tree rather than just instantiating the target.
      */
     protected boolean fetchAndInsertIntoTree() {
         return false;
     }
 }