| /*- |
| * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved. |
| * |
| * This file was distributed by Oracle as part of a version of Oracle Berkeley |
| * DB Java Edition made available at: |
| * |
| * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html |
| * |
| * Please see the LICENSE file included in the top-level directory of the |
| * appropriate version of Oracle Berkeley DB Java Edition for a copy of the |
| * license and additional information. |
| */ |
| |
| package com.sleepycat.je.dbi; |
| |
| import java.io.FileNotFoundException; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import com.sleepycat.je.CacheMode; |
| import com.sleepycat.je.DatabaseEntry; |
| import com.sleepycat.je.DatabaseException; |
| import com.sleepycat.je.EnvironmentFailureException; |
| import com.sleepycat.je.evictor.OffHeapCache; |
| import com.sleepycat.je.log.ErasedException; |
| import com.sleepycat.je.log.LogEntryType; |
| import com.sleepycat.je.log.LogManager; |
| import com.sleepycat.je.log.WholeEntry; |
| import com.sleepycat.je.log.entry.BINDeltaLogEntry; |
| import com.sleepycat.je.log.entry.LNLogEntry; |
| import com.sleepycat.je.log.entry.LogEntry; |
| import com.sleepycat.je.log.entry.OldBINDeltaLogEntry; |
| import com.sleepycat.je.tree.BIN; |
| import com.sleepycat.je.tree.IN; |
| import com.sleepycat.je.tree.LN; |
| import com.sleepycat.je.tree.Node; |
| import com.sleepycat.je.tree.OldBINDelta; |
| import com.sleepycat.je.utilint.DbLsn; |
| import com.sleepycat.je.utilint.SizeofMarker; |
| |
| /** |
| * SortedLSNTreeWalker uses ordered disk access rather than random access to |
| * iterate over a database tree. Faulting in data records by on-disk order can |
| * provide much improved performance over faulting in by key order, since the |
| * latter may require random access. SortedLSN walking does not obey cursor |
| * and locking constraints, and therefore can only be guaranteed consistent for |
| * a quiescent tree which is not being modified by user or daemon threads. |
| * |
| * The class walks over the tree using sorted LSN fetching for parts of the |
| * tree that are not in memory. It returns LSNs for each node in the tree, |
| * <b>except</b> the root IN, in an arbitrary order (i.e. not key |
| * order). The caller is responsible for getting the root IN's LSN explicitly. |
| * <p> |
| * A callback function specified in the constructor is executed for each LSN |
| * found. |
| * <p> |
| * The walker works in two phases. The first phase is to gather and return all |
| * the resident INs using the roots that were specified when the SLTW was |
| * constructed. For each child of each root, if the child is resident it is |
| * passed to the callback method (processLSN). If the child was not in memory, |
| * it is added to a list of LSNs to read. When all of the in-memory INs have |
| * been passed to the callback for all LSNs collected, phase 1 is complete. |
| * <p> |
| * In phase 2, for each of the sorted LSNs, the target is fetched, the type |
| * determined, and the LSN and type passed to the callback method for |
| * processing. LSNs of the children of those nodes are retrieved and the |
| * process repeated until there are no more nodes to be fetched for this |
| * database's tree. LSNs are accumulated in batches in this phase so that |
| * memory consumption is not excessive. For instance, if batches were not used |
| * then the LSNs of all of the BINs would need to be held in memory. |
| */ |
| public class SortedLSNTreeWalker { |
| |
| /* |
| * The interface for calling back to the user with each LSN. |
| */ |
| public interface TreeNodeProcessor { |
| void processLSN(long childLSN, |
| LogEntryType childType, |
| Node theNode, |
| byte[] lnKey, |
| int lastLoggedSize, |
| boolean isEmbedded) |
| throws FileNotFoundException; |
| |
| /* Used for processing dirty (unlogged) deferred write LNs. [#15365] */ |
| void processDirtyDeletedLN(long childLSN, LN ln, byte[] lnKey); |
| |
| /* Called when the internal memory limit is exceeded. */ |
| void noteMemoryExceeded(); |
| } |
| |
| /* |
| * Optionally passed to the SortedLSNTreeWalker to be called when an |
| * exception occurs. |
| */ |
| interface ExceptionPredicate { |
| /* Return true if the exception can be ignored. */ |
| boolean ignoreException(Exception e); |
| } |
| |
| final DatabaseImpl[] dbImpls; |
| protected final EnvironmentImpl envImpl; |
| |
| /* |
| * Save the root LSN at construction time, because the root may be |
| * nulled out before walk() executes. |
| */ |
| private final long[] rootLsns; |
| |
| /* The limit on memory to be used for internal structures during SLTW. */ |
| private long internalMemoryLimit = Long.MAX_VALUE; |
| |
| /* The current memory usage by internal SLTW structures. */ |
| private long internalMemoryUsage; |
| |
| private final TreeNodeProcessor callback; |
| |
| /* |
| * If true, then walker should fetch LNs and pass them to the |
| * TreeNodeProcessor callback method. Even if true, dup LNs are not |
| * fetched because they are normally never used (see accumulateDupLNs). |
| */ |
| public boolean accumulateLNs = false; |
| |
| boolean preloadIntoOffHeapCache = false; |
| |
| /* |
| * If true, fetch LNs in a dup DB. Since LNs in a dup DB are not used by |
| * cursor operations, fetching dup LNs should only be needed in very |
| * exceptional situations. Currently this field is never set to true. |
| */ |
| boolean accumulateDupLNs = false; |
| |
| /* |
| * If non-null, save any exceptions encountered while traversing nodes into |
| * this savedException list, in order to walk as much of the tree as |
| * possible. The caller of the tree walker will handle the exceptions. |
| */ |
| private final List<DatabaseException> savedExceptions; |
| |
| private final ExceptionPredicate excPredicate; |
| |
| /* |
| * The batch size of LSNs which will be sorted. |
| */ |
| private long lsnBatchSize = Long.MAX_VALUE; |
| |
| /* Holder for returning LN key from fetchLSN. */ |
| private final DatabaseEntry lnKeyEntry = new DatabaseEntry(); |
| |
| /* |
| * This map provides an LSN to IN/index. When an LSN is processed by the |
| * tree walker, the map is used to lookup the parent IN and child entry |
| * index of each LSN processed by the tree walker. Since fetchLSN is |
| * called with an arbitrary LSN, and since when we fetch (for preload) we |
| * need to setup the parent to refer to the node which we are prefetching, |
| * we need to have the parent in hand at the time of the call to fetchLSN. |
| * This map allows us to keep a reference to that parent so that we can |
| * call fetchNode on that parent. |
| * |
| * It is also necessary to maintain this map for cases other than preload() |
| * so that during multi-db walks (i.e. multi db preload), we can associate |
| * an arbitrary LSN back to the parent IN and therefore connect a fetch'ed |
| * Node into the proper place in the tree. |
| * |
| * LSN -> INEntry |
| */ |
| /* struct to hold IN/entry-index pair. */ |
| public static class INEntry { |
| final IN in; |
| final int index; |
| |
| INEntry(IN in, int index) { |
| assert in != null; |
| assert in.getDatabase() != null; |
| this.in = in; |
| this.index = index; |
| } |
| |
| public INEntry(@SuppressWarnings("unused") SizeofMarker marker) { |
| this.in = null; |
| this.index = 0; |
| } |
| |
| Object getDelta() { |
| return null; |
| } |
| |
| long getDeltaLsn() { |
| return DbLsn.NULL_LSN; |
| } |
| |
| long getMemorySize() { |
| return MemoryBudget.HASHMAP_ENTRY_OVERHEAD + |
| MemoryBudget.INENTRY_OVERHEAD; |
| } |
| } |
| |
| /** |
| * Supplements INEntry with BIN-delta information. When a BIN-delta is |
| * encountered during the fetching process, we cannot immediately place it |
| * in the tree. Instead we queue a DeltaINEntry for fetching the full BIN, |
| * in LSN order as usual. When the full BIN is fetched, the DeltaINEntry |
| * is used to apply the delta and place the result in the tree. |
| */ |
| public static class DeltaINEntry extends INEntry { |
| private final Object delta; |
| private final long deltaLsn; |
| |
| DeltaINEntry(IN in, int index, Object delta, long deltaLsn) { |
| super(in, index); |
| assert (delta != null); |
| assert (deltaLsn != DbLsn.NULL_LSN); |
| this.delta = delta; |
| this.deltaLsn = deltaLsn; |
| } |
| |
| public DeltaINEntry(@SuppressWarnings("unused") SizeofMarker marker) { |
| super(marker); |
| this.delta = null; |
| this.deltaLsn = 0; |
| } |
| |
| @Override |
| Object getDelta() { |
| return delta; |
| } |
| |
| @Override |
| long getDeltaLsn() { |
| return deltaLsn; |
| } |
| |
| @Override |
| long getMemorySize() { |
| final long deltaSize; |
| if (delta instanceof OldBINDelta) { |
| deltaSize = ((OldBINDelta) delta).getMemorySize(); |
| } else { |
| deltaSize = ((BIN) delta).getInMemorySize(); |
| } |
| return deltaSize + |
| MemoryBudget.HASHMAP_ENTRY_OVERHEAD + |
| MemoryBudget.DELTAINENTRY_OVERHEAD; |
| } |
| } |
| |
| private final Map<Long, INEntry> lsnINMap = new HashMap<>(); |
| |
| /* |
| * @param dbImpls an array of DatabaseImpls which should be walked over |
| * in disk order. This array must be parallel to the rootLsns array in |
| * that rootLsns[i] must be the root LSN for dbImpls[i]. |
| * |
| * @param rootLsns is passed in addition to the dbImpls, because the |
| * root may be nulled out on the dbImpl before walk() is called. |
| * |
| * @param callback the callback instance |
| * |
| * @param savedExceptions a List of DatabaseExceptions encountered during |
| * the tree walk. |
| * |
| * @param excPredicate a predicate to determine whether a given exception |
| * should be ignored. |
| */ |
| public SortedLSNTreeWalker(DatabaseImpl[] dbImpls, |
| long[] rootLsns, |
| TreeNodeProcessor callback, |
| List<DatabaseException> savedExceptions, |
| ExceptionPredicate excPredicate) { |
| |
| if (dbImpls == null || dbImpls.length < 1) { |
| throw EnvironmentFailureException.unexpectedState |
| ("DatabaseImpls array is null or 0-length for " + |
| "SortedLSNTreeWalker"); |
| } |
| |
| this.dbImpls = dbImpls; |
| this.envImpl = dbImpls[0].getEnv(); |
| /* Make sure all databases are from the same environment. */ |
| for (DatabaseImpl di : dbImpls) { |
| EnvironmentImpl ei = di.getEnv(); |
| if (ei == null) { |
| throw EnvironmentFailureException.unexpectedState |
| ("environmentImpl is null for target db " + |
| di.getName()); |
| } |
| |
| if (ei != this.envImpl) { |
| throw new IllegalArgumentException |
| ("Environment.preload() must be called with Databases " + |
| "which are all in the same Environment. (" + |
| di.getName() + ")"); |
| } |
| } |
| |
| this.rootLsns = rootLsns; |
| this.callback = callback; |
| this.savedExceptions = savedExceptions; |
| this.excPredicate = excPredicate; |
| } |
| |
| void setLSNBatchSize(long lsnBatchSize) { |
| this.lsnBatchSize = lsnBatchSize; |
| } |
| |
| public void setInternalMemoryLimit(long internalMemoryLimit) { |
| this.internalMemoryLimit = internalMemoryLimit; |
| } |
| |
| private void incInternalMemoryUsage(long increment) { |
| internalMemoryUsage += increment; |
| } |
| |
| private LSNAccumulator createLSNAccumulator() { |
| return new LSNAccumulator() { |
| @Override |
| void noteMemUsage(long increment) { |
| incInternalMemoryUsage(increment); |
| } |
| }; |
| } |
| |
| /** |
| * Find all non-resident nodes, and execute the callback. The root IN's |
| * LSN is not returned to the callback. |
| */ |
| public void walk() { |
| walkInternal(); |
| } |
| |
| void walkInternal() { |
| |
| /* |
| * Phase 1: seed the SLTW with all of the roots of the DatabaseImpl[]. |
| * For each root, look for all in-memory child nodes and process them |
| * (i.e. invoke the callback on those LSNs). For child nodes which are |
| * not in-memory (i.e. they are LSNs only and no Node references), |
| * accumulate their LSNs to be later sorted and processed during phase |
| * 2. |
| */ |
| LSNAccumulator pendingLSNs = createLSNAccumulator(); |
| for (int i = 0; i < dbImpls.length; i += 1) { |
| processRootLSN(dbImpls[i], pendingLSNs, rootLsns[i]); |
| } |
| |
| /* |
| * Phase 2: Sort and process any LSNs we've gathered so far. For each |
| * LSN, fetch the target record and process it as in Phase 1 (i.e. |
| * in-memory children get passed to the callback, not in-memory children |
| * have their LSN accumulated for later sorting, fetching, and |
| * processing. |
| */ |
| processAccumulatedLSNs(pendingLSNs); |
| } |
| |
| /* |
| * Retrieve the root for the given DatabaseImpl and then process its |
| * children. |
| */ |
| private void processRootLSN(DatabaseImpl dbImpl, |
| LSNAccumulator pendingLSNs, |
| long rootLsn) { |
| IN root = getOrFetchRootIN(dbImpl, rootLsn); |
| if (root != null) { |
| try { |
| accumulateLSNs(root, pendingLSNs, null, -1); |
| } finally { |
| releaseRootIN(root); |
| } |
| } |
| } |
| |
| /* |
| * Traverse the in-memory tree rooted at "parent". For each visited node N |
| * call the callback method on N and put in pendingLSNs the LSNs of N's |
| * non-resident children. |
| * |
| * On entering this method, parent is latched and remains latched on exit. |
| */ |
| private void accumulateLSNs(final IN parent, |
| final LSNAccumulator pendingLSNs, |
| final IN ohBinParent, |
| final int ohBinIndex) { |
| envImpl.checkOpen(); |
| |
| final DatabaseImpl db = parent.getDatabase(); |
| final boolean dups = db.getSortedDuplicates(); |
| |
| /* |
| * Without dups, all BINs contain only LN children. With dups, it |
| * depends on the dup format. Preload works with the old dup format |
| * and the new. |
| * |
| * In the new dup format (or after dup conversion), BINs contain only |
| * LNs and no DBINs exist. In the old dup format, DBINs contain only |
| * LN children, but BINs may contain a mix of LNs and DINs. |
| */ |
| final boolean allChildrenAreLNs; |
| if (!dups || db.getDupsConverted()) { |
| allChildrenAreLNs = parent.isBIN(); |
| } else { |
| allChildrenAreLNs = parent.isBIN() && parent.containsDuplicates(); |
| } |
| |
| /* |
| * If LNs are not needed, there is no need to accumulate the child LSNs |
| * when all children are LNs. |
| */ |
| final boolean accumulateChildren = |
| !allChildrenAreLNs || (dups ? accumulateDupLNs : accumulateLNs); |
| |
| final BIN parentBin = parent.isBIN() ? ((BIN) parent) : null; |
| final OffHeapCache ohCache = envImpl.getOffHeapCache(); |
| |
| /* |
| * Process all children, but only accumulate LSNs for children that are |
| * not in memory. |
| */ |
| for (int i = 0; i < parent.getNEntries(); i += 1) { |
| |
| final long lsn = parent.getLsn(i); |
| Node child = parent.getTarget(i); |
| final boolean childCached = child != null; |
| final boolean isEmbedded = parent.isEmbeddedLN(i); |
| |
| final byte[] lnKey = |
| (allChildrenAreLNs || (childCached && child.isLN())) ? |
| parent.getKey(i) : null; |
| |
| if (parentBin != null && parentBin.isDefunct(i)) { |
| |
| /* Dirty LNs (deferred write) get special treatment. */ |
| processDirtyLN(child, lsn, lnKey); |
| /* continue; */ |
| |
| } else if (!childCached && |
| parentBin != null && |
| parentBin.getOffHeapLNId(i) != 0) { |
| |
| /* Embedded LNs are not stored off-heap */ |
| assert !isEmbedded; |
| |
| child = ohCache.loadLN(parentBin, i, CacheMode.UNCHANGED); |
| assert child != null; |
| |
| processChild( |
| lsn, child, lnKey, parent.getLastLoggedSize(i), |
| false /*isEmbedded*/, pendingLSNs, null, -1); |
| |
| } else if (!childCached && parent.getOffHeapBINId(i) >= 0) { |
| |
| /* Embedded LNs are not stored off-heap */ |
| assert !isEmbedded; |
| |
| child = ohCache.materializeBIN( |
| envImpl, ohCache.getBINBytes(parent, i)); |
| |
| final BIN bin = (BIN) child; |
| bin.latchNoUpdateLRU(db); |
| boolean isLatched = true; |
| |
| try { |
| if (bin.isBINDelta()) { |
| |
| /* Deltas not allowed with deferred-write. */ |
| assert (lsn != DbLsn.NULL_LSN); |
| |
| /* |
| * Storing an off-heap reference would use less memory, |
| * but we prefer to optimize in the future by |
| * re-implementing preload. |
| */ |
| final long fullLsn = bin.getLastFullLsn(); |
| assert fullLsn != DbLsn.NULL_LSN; |
| pendingLSNs.add(fullLsn); |
| addToLsnINMap(fullLsn, parent, i, bin, lsn); |
| |
| } else { |
| |
| bin.releaseLatch(); |
| isLatched = false; |
| |
| processChild( |
| lsn, bin, lnKey, parent.getLastLoggedSize(i), |
| false /*isEmbedded*/, pendingLSNs, parent, i); |
| } |
| } finally { |
| if (isLatched) { |
| bin.releaseLatch(); |
| } |
| } |
| |
| } else if (accumulateChildren && |
| !childCached && |
| lsn != DbLsn.NULL_LSN) { |
| |
| /* |
| * Child is not in cache. Put its LSN in the current batch of |
| * LSNs to be sorted and fetched in phase 2. But don't do |
| * this if the child is an embedded LN. |
| */ |
| if (!isEmbedded) { |
| pendingLSNs.add(lsn); |
| if (ohBinParent != null) { |
| addToLsnINMap(lsn, ohBinParent, ohBinIndex); |
| } else { |
| addToLsnINMap(lsn, parent, i); |
| } |
| } else { |
| processChild( |
| DbLsn.NULL_LSN, null /*child*/, lnKey, |
| 0 /*lastLoggedSize*/, true /*isEmbedded*/, |
| pendingLSNs, null, -1); |
| } |
| |
| } else if (childCached) { |
| |
| child.latchShared(); |
| boolean isLatched = true; |
| |
| try { |
| if (child.isBINDelta()) { |
| |
| /* Deltas not allowed with deferred-write. */ |
| assert (lsn != DbLsn.NULL_LSN); |
| |
| final BIN delta = (BIN) child; |
| final long fullLsn = delta.getLastFullLsn(); |
| pendingLSNs.add(fullLsn); |
| addToLsnINMap(fullLsn, parent, i, delta, lsn); |
| |
| } else { |
| |
| child.releaseLatch(); |
| isLatched = false; |
| |
| processChild( |
| lsn, child, lnKey, parent.getLastLoggedSize(i), |
| isEmbedded, pendingLSNs, null, -1); |
| } |
| } finally { |
| if (isLatched) { |
| child.releaseLatch(); |
| } |
| } |
| |
| } else { |
| /* |
| * We are here because the child was not cached and was not |
| * accumulated either (because it was an LN and LN accumulation |
| * is turned off or its LSN was NULL). |
| */ |
| processChild( |
| lsn, null /*child*/, lnKey, parent.getLastLoggedSize(i), |
| isEmbedded, pendingLSNs, null, -1); |
| } |
| |
| /* |
| * If we've exceeded the batch size then process the current |
| * batch and start a new one. |
| */ |
| final boolean internalMemoryExceeded = |
| internalMemoryUsage > internalMemoryLimit; |
| |
| if (pendingLSNs.getNTotalEntries() > lsnBatchSize || |
| internalMemoryExceeded) { |
| if (internalMemoryExceeded) { |
| callback.noteMemoryExceeded(); |
| } |
| processAccumulatedLSNs(pendingLSNs); |
| pendingLSNs.clear(); |
| } |
| } |
| } |
| |
| private void processDirtyLN(Node node, long lsn, byte[] lnKey) { |
| if (node != null && node.isLN()) { |
| LN ln = (LN) node; |
| if (ln.isDirty()) { |
| callback.processDirtyDeletedLN(lsn, ln, lnKey); |
| } |
| } |
| } |
| |
| private void processChild( |
| final long lsn, |
| final Node child, |
| final byte[] lnKey, |
| final int lastLoggedSize, |
| final boolean isEmbedded, |
| final LSNAccumulator pendingLSNs, |
| final IN ohBinParent, |
| final int ohBinIndex) { |
| |
| final boolean childCached = (child != null); |
| |
| /* |
| * If the child is resident, use its log type, else it must be an LN. |
| */ |
| callProcessLSNHandleExceptions( |
| lsn, |
| (!childCached ? |
| LogEntryType.LOG_INS_LN /* Any LN type will do */ : |
| child.getGenericLogType()), |
| child, lnKey, lastLoggedSize, isEmbedded); |
| |
| if (childCached && child.isIN()) { |
| final IN nodeAsIN = (IN) child; |
| try { |
| nodeAsIN.latch(CacheMode.UNCHANGED); |
| accumulateLSNs(nodeAsIN, pendingLSNs, ohBinParent, ohBinIndex); |
| } finally { |
| nodeAsIN.releaseLatch(); |
| } |
| } |
| } |
| |
| /* |
| * Process a batch of LSNs by sorting and fetching each of them. |
| */ |
| private void processAccumulatedLSNs(LSNAccumulator pendingLSNs) { |
| |
| while (!pendingLSNs.isEmpty()) { |
| final long[] currentLSNs = pendingLSNs.getAndSortPendingLSNs(); |
| pendingLSNs = createLSNAccumulator(); |
| for (long lsn : currentLSNs) { |
| fetchAndProcessLSN(lsn, pendingLSNs); |
| } |
| } |
| } |
| |
| /* |
| * Fetch the node at 'lsn' and callback to let the invoker process it. If |
| * it is an IN, accumulate LSNs for it. |
| */ |
| private void fetchAndProcessLSN(long lsn, LSNAccumulator pendingLSNs) { |
| |
| lnKeyEntry.setData(null); |
| |
| final FetchResult result = fetchLSNHandleExceptions( |
| lsn, lnKeyEntry, pendingLSNs); |
| |
| if (result == null) { |
| return; |
| } |
| |
| final boolean isIN = result.node.isIN(); |
| final IN in; |
| if (isIN) { |
| in = (IN) result.node; |
| in.latch(CacheMode.UNCHANGED); |
| } else { |
| in = null; |
| } |
| |
| try { |
| callProcessLSNHandleExceptions( |
| lsn, result.node.getGenericLogType(), result.node, |
| lnKeyEntry.getData(), result.lastLoggedSize, |
| false /*isEmbedded*/); |
| |
| if (isIN) { |
| accumulateLSNs( |
| in, pendingLSNs, result.ohBinParent, result.ohBinIndex); |
| } |
| } finally { |
| if (isIN) { |
| in.releaseLatch(); |
| } |
| } |
| } |
| |
| private FetchResult fetchLSNHandleExceptions( |
| long lsn, |
| DatabaseEntry lnKeyEntry, |
| LSNAccumulator pendingLSNs) { |
| |
| DatabaseException dbe = null; |
| |
| try { |
| return fetchLSN(lsn, lnKeyEntry, pendingLSNs); |
| |
| } catch (DatabaseException e) { |
| if (excPredicate == null || |
| !excPredicate.ignoreException(e)) { |
| dbe = e; |
| } |
| } |
| |
| if (dbe != null) { |
| if (savedExceptions != null) { |
| |
| /* |
| * This LSN fetch hit a failure. Do as much of the rest of |
| * the tree as possible. |
| */ |
| savedExceptions.add(dbe); |
| } else { |
| throw dbe; |
| } |
| } |
| |
| return null; |
| } |
| |
| private void callProcessLSNHandleExceptions(long childLSN, |
| LogEntryType childType, |
| Node theNode, |
| byte[] lnKey, |
| int lastLoggedSize, |
| boolean isEmbedded) { |
| DatabaseException dbe = null; |
| |
| try { |
| callback.processLSN( |
| childLSN, childType, theNode, lnKey, lastLoggedSize, |
| isEmbedded); |
| |
| } catch (FileNotFoundException e) { |
| if (excPredicate == null || |
| !excPredicate.ignoreException(e)) { |
| dbe = new EnvironmentFailureException( |
| envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e); |
| } |
| |
| } catch (DatabaseException e) { |
| if (excPredicate == null || |
| !excPredicate.ignoreException(e)) { |
| dbe = e; |
| } |
| } |
| |
| if (dbe != null) { |
| if (savedExceptions != null) { |
| |
| /* |
| * This LSN fetch hit a failure. Do as much of the rest of |
| * the tree as possible. |
| */ |
| savedExceptions.add(dbe); |
| } else { |
| throw dbe; |
| } |
| } |
| } |
| |
| /** |
| * Returns the root IN, latched shared. Allows subclasses to override |
| * getResidentRootIN and/or getRootIN to modify behavior. |
| * getResidentRootIN is called first, |
| */ |
| private IN getOrFetchRootIN(DatabaseImpl dbImpl, long rootLsn) { |
| final IN root = getResidentRootIN(dbImpl); |
| if (root != null) { |
| return root; |
| } |
| if (rootLsn == DbLsn.NULL_LSN) { |
| return null; |
| } |
| return getRootIN(dbImpl, rootLsn); |
| } |
| |
| /** |
| * The default behavior fetches the rootIN from the log and latches it |
| * shared. Classes extending this may fetch (and latch) the root from the |
| * tree. |
| */ |
| IN getRootIN(DatabaseImpl dbImpl, long rootLsn) { |
| final IN root = (IN) |
| envImpl.getLogManager().getEntryHandleNotFound(rootLsn); |
| if (root == null) { |
| return null; |
| } |
| root.setDatabase(dbImpl); |
| root.latchShared(CacheMode.DEFAULT); |
| return root; |
| } |
| |
| /** |
| * The default behavior returns (and latches shared) the IN if it is |
| * resident in the Btree, or null otherwise. Classes extending this may |
| * return (and latch) a known IN object. |
| */ |
| public IN getResidentRootIN(DatabaseImpl dbImpl) { |
| return dbImpl.getTree().getResidentRootIN(true /*latched*/); |
| } |
| |
| /** |
| * Release the latch. Overriding this method should not be necessary. |
| */ |
| private void releaseRootIN(IN root) { |
| root.releaseLatch(); |
| } |
| |
| /** |
| * Add an LSN-IN/index entry to the map. |
| */ |
| private void addToLsnINMap(long lsn, IN in, int index) { |
| addEntryToLsnMap(lsn, new INEntry(in, index)); |
| } |
| |
| /** |
| * Add an LSN-IN/index entry, along with a delta and delta LSN, to the map. |
| */ |
| private void addToLsnINMap(long lsn, |
| IN in, |
| int index, |
| Object delta, |
| long deltaLsn) { |
| addEntryToLsnMap(lsn, new DeltaINEntry(in, index, delta, deltaLsn)); |
| } |
| |
| private void addEntryToLsnMap(long lsn, INEntry inEntry) { |
| if (lsnINMap.put(lsn, inEntry) == null) { |
| incInternalMemoryUsage(inEntry.getMemorySize()); |
| } |
| } |
| |
| private static class FetchResult { |
| final Node node; |
| final int lastLoggedSize; |
| final IN ohBinParent; |
| final int ohBinIndex; |
| |
| FetchResult(final Node node, |
| final int lastLoggedSize, |
| final IN ohBinParent, |
| final int ohBinIndex) { |
| this.node = node; |
| this.lastLoggedSize = lastLoggedSize; |
| this.ohBinParent = ohBinParent; |
| this.ohBinIndex = ohBinIndex; |
| } |
| } |
| |
| /* |
| * Process an LSN. Get & remove its INEntry from the map, then fetch the |
| * target at the INEntry's IN/index pair. This method will be called in |
| * sorted LSN order. |
| */ |
| private FetchResult fetchLSN( |
| long lsn, |
| DatabaseEntry lnKeyEntry, |
| LSNAccumulator pendingLSNs) { |
| |
| final LogManager logManager = envImpl.getLogManager(); |
| final OffHeapCache ohCache = envImpl.getOffHeapCache(); |
| |
| final INEntry inEntry = lsnINMap.remove(lsn); |
| assert (inEntry != null) : DbLsn.getNoFormatString(lsn); |
| |
| incInternalMemoryUsage(- inEntry.getMemorySize()); |
| |
| IN in = inEntry.in; |
| int index = inEntry.index; |
| |
| IN ohBinParent = null; |
| int ohBinIndex = -1; |
| |
| IN in1ToUnlatch = null; |
| IN in2ToUnlatch = null; |
| |
| if (!in.isLatchExclusiveOwner()) { |
| in.latch(); |
| in1ToUnlatch = in; |
| } |
| |
| final DatabaseImpl dbImpl = in.getDatabase(); |
| byte[] lnKey = null; |
| |
| Node residentNode = in.getTarget(index); |
| if (residentNode != null) { |
| residentNode.latch(); |
| } |
| |
| try { |
| /* |
| * When the indexed slot contains an off-heap BIN, the node to |
| * fetch is an LN within the off-heap BIN or the full BIN to merge |
| * with an off-heap BIN-delta. |
| */ |
| Object deltaObject = inEntry.getDelta(); |
| boolean isOffHeapBinInTree = in.getOffHeapBINId(index) >= 0; |
| boolean isLnInOffHeapBin = false; |
| |
| if (isOffHeapBinInTree && deltaObject == null) { |
| /* |
| * When fetching an LN within an off-heap BIN, materialize the |
| * parent BIN and set in/index to this true parent. |
| */ |
| isLnInOffHeapBin = true; |
| |
| final BIN ohBin = ohCache.materializeBIN( |
| envImpl, ohCache.getBINBytes(in, index)); |
| |
| int foundIndex = -1; |
| for (int i = 0; i < ohBin.getNEntries(); i += 1) { |
| if (ohBin.getLsn(i) == lsn) { |
| foundIndex = i; |
| break; |
| } |
| } |
| |
| if (foundIndex == -1) { |
| return null; // See note on concurrent activity below. |
| } |
| |
| ohBinParent = in; |
| ohBinIndex = index; |
| |
| in = ohBin; |
| index = foundIndex; |
| |
| in.latchNoUpdateLRU(dbImpl); |
| in2ToUnlatch = in; |
| } |
| |
| /* |
| * Concurrent activity (e.g., log cleaning) that was active before |
| * we took the root latch may have changed the state of a slot. |
| * Repeat check for LN deletion/expiration and check that the LSN |
| * has not changed. |
| */ |
| if (in.isBIN() && ((BIN) in).isDefunct(index)) { |
| return null; |
| } |
| |
| if (deltaObject == null) { |
| if (in.getLsn(index) != lsn) { |
| return null; |
| } |
| } else { |
| if (in.getLsn(index) != inEntry.getDeltaLsn()) { |
| return null; |
| } |
| } |
| |
| boolean mutateResidentDeltaToFullBIN = false; |
| |
| if (residentNode != null) { |
| /* |
| * If the resident node is not a delta then concurrent |
| * activity (e.g., log cleaning) must have loaded the node. |
| * Just return it and continue. |
| */ |
| if (!residentNode.isBINDelta()) { |
| if (residentNode.isLN()) { |
| lnKeyEntry.setData(in.getKey(index)); |
| } |
| return new FetchResult( |
| residentNode, in.getLastLoggedSize(index), null, -1); |
| } |
| |
| /* The resident node is a delta. */ |
| if (((BIN) residentNode).getLastFullLsn() != lsn) { |
| return null; // See note on concurrent activity above. |
| } |
| mutateResidentDeltaToFullBIN = true; |
| } |
| |
| /* Fetch log entry. */ |
| final WholeEntry wholeEntry; |
| try { |
| wholeEntry = logManager.getWholeLogEntry(lsn); |
| |
| } catch (FileNotFoundException|ErasedException e) { |
| final String msg = |
| (fetchAndInsertIntoTree() ? |
| "Preload failed" : |
| "SortedLSNTreeWalker failed") + |
| " dbId=" + dbImpl.getId() + |
| " isOffHeapBinInTree=" + isOffHeapBinInTree + |
| " isLnInOffHeapBin=" + isLnInOffHeapBin + |
| " deltaObject=" + (deltaObject != null) + |
| " residentNode=" + (residentNode != null); |
| |
| throw new EnvironmentFailureException( |
| envImpl, |
| (e instanceof FileNotFoundException) ? |
| EnvironmentFailureReason.LOG_FILE_NOT_FOUND : |
| EnvironmentFailureReason.LOG_CHECKSUM, |
| in.makeFetchErrorMsg(msg, lsn, index), e); |
| } |
| |
| final LogEntry entry = wholeEntry.getEntry(); |
| final int lastLoggedSize = wholeEntry.getHeader().getEntrySize(); |
| |
| /* |
| * For a BIN delta, queue fetching of the full BIN and combine the |
| * full BIN with the delta when it is processed later (see below). |
| * |
| * Note that for preload, this means that a BIN-delta is not placed |
| * in the tree when there is not enough memory for the full BIN. |
| * Ideally we should place the BIN-delta in the tree here. |
| */ |
| if (entry instanceof BINDeltaLogEntry) { |
| final BINDeltaLogEntry deltaEntry = (BINDeltaLogEntry) entry; |
| final long fullLsn = deltaEntry.getPrevFullLsn(); |
| final BIN delta = deltaEntry.getMainItem(); |
| pendingLSNs.add(fullLsn); |
| addToLsnINMap(fullLsn, in, index, delta, lsn); |
| return null; |
| } |
| |
| if (entry instanceof OldBINDeltaLogEntry) { |
| final OldBINDelta delta = (OldBINDelta) entry.getMainItem(); |
| final long fullLsn = delta.getLastFullLsn(); |
| pendingLSNs.add(fullLsn); |
| addToLsnINMap(fullLsn, in, index, delta, lsn); |
| return null; |
| } |
| |
| /* For an LNLogEntry, call postFetchInit and get the lnKey. */ |
| if (entry instanceof LNLogEntry) { |
| final LNLogEntry<?> lnEntry = (LNLogEntry<?>) entry; |
| lnEntry.postFetchInit(dbImpl); |
| lnKey = lnEntry.getKey(); |
| lnKeyEntry.setData(lnKey); |
| } |
| |
| /* Get the Node from the LogEntry. */ |
| final Node ret = (Node) entry.getResolvedItem(dbImpl); |
| |
| /* |
| * For an IN Node, set the database so it will be passed down to |
| * nested fetches. |
| */ |
| long lastLoggedLsn = lsn; |
| if (ret.isIN()) { |
| final IN retIn = (IN) ret; |
| retIn.setDatabase(dbImpl); |
| } |
| |
| /* |
| * If there is a delta, then this is the full BIN to which the |
| * delta must be applied. The delta LSN is the last logged LSN. |
| */ |
| if (mutateResidentDeltaToFullBIN) { |
| final BIN fullBIN = (BIN) ret; |
| BIN delta = (BIN) residentNode; |
| if (fetchAndInsertIntoTree()) { |
| delta.mutateToFullBIN(fullBIN, false /*leaveFreeSlot*/); |
| |
| return new FetchResult( |
| residentNode, lastLoggedSize, ohBinParent, ohBinIndex); |
| } else { |
| delta.reconstituteBIN( |
| dbImpl, fullBIN, false /*leaveFreeSlot*/); |
| |
| return new FetchResult( |
| ret, lastLoggedSize, ohBinParent, ohBinIndex); |
| } |
| } |
| |
| if (deltaObject != null) { |
| final BIN fullBIN = (BIN) ret; |
| |
| if (deltaObject instanceof OldBINDelta) { |
| final OldBINDelta delta = (OldBINDelta) deltaObject; |
| assert lsn == delta.getLastFullLsn(); |
| delta.reconstituteBIN(dbImpl, fullBIN); |
| lastLoggedLsn = inEntry.getDeltaLsn(); |
| } else { |
| final BIN delta = (BIN) deltaObject; |
| assert lsn == delta.getLastFullLsn(); |
| |
| delta.reconstituteBIN( |
| dbImpl, fullBIN, false /*leaveFreeSlot*/); |
| |
| lastLoggedLsn = inEntry.getDeltaLsn(); |
| } |
| } |
| |
| assert !ret.isBINDelta(false); |
| |
| /* |
| * When we store an off-heap BIN here, the caller must pass its |
| * parent/index to accumulateLSNs. |
| */ |
| IN retOhBinParent = null; |
| int retOhBinIndex = -1; |
| |
| /* During a preload, finally place the Node into the Tree. */ |
| if (fetchAndInsertIntoTree()) { |
| |
| /* Last logged size is not present before log version 9. */ |
| in.setLastLoggedSize(index, lastLoggedSize); |
| |
| /* |
| * We don't worry about the memory usage being kept below the |
| * max by the evictor, since we keep the root INs latched. |
| */ |
| final MemoryBudget memBudget = envImpl.getMemoryBudget(); |
| final boolean storeOffHeap = |
| preloadIntoOffHeapCache && |
| memBudget.getCacheMemoryUsage() > memBudget.getMaxMemory(); |
| |
| /* |
| * Note that UINs are always stored in the main cache even if |
| * it is full. The idea is that LNs and BINs should be evicted |
| * from main to make room. When the main cache fills with UINs, |
| * and an off-heap cache is also being filled, we currently |
| * allow the main cache to overflow. |
| */ |
| if (isOffHeapBinInTree || (storeOffHeap && !ret.isUpperIN())) { |
| if (ret.isLN()) { |
| /* |
| * Store LN off-heap. If an oh LN was added to an oh |
| * BIN we must re-store the oh BIN as well. This is |
| * inefficient but we don't know of a simple way to |
| * optimize. |
| */ |
| final BIN bin = (BIN) in; |
| final LN retLn = (LN) ret; |
| ohCache.storePreloadedLN(bin, index, retLn); |
| if (isOffHeapBinInTree) { |
| assert isLnInOffHeapBin; |
| ohCache.storePreloadedBIN( |
| bin, ohBinParent, ohBinIndex); |
| } |
| } else { |
| /* |
| * Store full BIN off-heap. Note that setLastLoggedLSN |
| * is normally called by postFetchInit or postLoadInit, |
| * but neither is used during preload so we must call |
| * setLastLoggedLsn here. |
| */ |
| assert !isLnInOffHeapBin; |
| final BIN retBin = (BIN) ret; |
| retBin.latchNoUpdateLRU(dbImpl); |
| retBin.setLastLoggedLsn(lsn); |
| try { |
| if (!ohCache.storePreloadedBIN( |
| retBin, in, index)) { |
| return null; // could not allocate memory |
| } |
| } finally { |
| retBin.releaseLatch(); |
| } |
| retOhBinParent = in; |
| retOhBinIndex = index; |
| } |
| } else { |
| /* Attach node to the Btree as in a normal operation. */ |
| if (ret.isIN()) { |
| final IN retIn = (IN) ret; |
| retIn.latchNoUpdateLRU(dbImpl); |
| ret.postFetchInit(dbImpl, lastLoggedLsn); |
| in.attachNode(index, ret, lnKey); |
| retIn.releaseLatch(); |
| } else { |
| ret.postFetchInit(dbImpl, lastLoggedLsn); |
| in.attachNode(index, ret, lnKey); |
| } |
| |
| /* BINs with resident LNs shouldn't be in the dirty LRU. */ |
| if (in.isBIN()) { |
| final CacheMode mode = |
| in.getDatabase().getDefaultCacheMode(); |
| |
| if (mode != CacheMode.EVICT_LN) { |
| envImpl.getEvictor().moveToPri1LRU(in); |
| } |
| } |
| } |
| |
| /* |
| * Clear the fetched-cold flag set, since we want the preloaded |
| * data to be "hot". This is necessary because the node is not |
| * latched after being preloaded, as it normally would be after |
| * being attached. |
| */ |
| if (ret.isIN()) { |
| ((IN) ret).setFetchedCold(false); |
| } else if (ret.isLN()) { |
| ((LN) ret).setFetchedCold(false); |
| } |
| } |
| |
| return new FetchResult( |
| ret, lastLoggedSize, retOhBinParent, retOhBinIndex); |
| |
| } finally { |
| if (residentNode != null) { |
| residentNode.releaseLatch(); |
| } |
| if (in1ToUnlatch != null) { |
| in1ToUnlatch.releaseLatch(); |
| } |
| if (in2ToUnlatch != null) { |
| in2ToUnlatch.releaseLatch(); |
| } |
| } |
| } |
| |
| /* |
| * Overriden by subclasses if fetch of an LSN should result in insertion |
| * into tree rather than just instantiating the target. |
| */ |
| protected boolean fetchAndInsertIntoTree() { |
| return false; |
| } |
| } |