blob: 32a98cba4ac8e7ea588af89a4b708b12718f521a [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.rep.vlsn;
import static com.sleepycat.je.utilint.VLSN.NULL_VLSN;
import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.util.SortedMap;
import java.util.TreeMap;
import com.sleepycat.bind.tuple.LongBinding;
import com.sleepycat.je.Cursor;
import com.sleepycat.je.CursorConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.cleaner.DataEraser;
import com.sleepycat.je.cleaner.FileProtector;
import com.sleepycat.je.cleaner.FileProtector.ProtectedFileSet;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.rep.impl.RepParams;
import com.sleepycat.je.rep.vlsn.VLSNRange.VLSNRangeBinding;
import com.sleepycat.je.txn.BasicLocker;
import com.sleepycat.je.txn.Locker;
import com.sleepycat.je.txn.Txn;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LongStat;
import com.sleepycat.je.utilint.Pair;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.VLSN;
/**
* See @link{VLSNIndex} for an overview of the mapping system. The VLSNTracker
* packages the VLSNRange and the cached, in-memory VLSNBuckets.
*
* The tracker has a notion of the "currentBucket", which is the one receiving
* updates. All other cached buckets are finished and are awaiting a write to
* the database. Those finished buckets will only be updated in special
* circumstances, such as log cleaning or replication stream truncation, when
* we can assume that there will no readers viewing the buckets.
*/
class VLSNTracker {
private final EnvironmentImpl envImpl;
/* The first mapping that is is the tracker cache. */
private VLSN firstTrackedVLSN = NULL_VLSN;
/* The last VLSN that is stored on disk. */
private VLSN lastOnDiskVLSN = NULL_VLSN;
/*
* A cache of buckets that are not on disk. The map key is the bucket's
* first VLSN.
*/
SortedMap<Long, VLSNBucket> bucketCache;
/*
* The range should always be updated through assignment to a new
* VLSNRange, to ensure that the values stay consistent. The current bucket
* mutex must be taken in order to update this value, so that the current
* bucket is updated before the range is updated. When reading range
* fields, the caller must be sure to get a reference to a single range
* instance, so that there's no danger of getting inconsistent values.
*/
protected volatile VLSNRange range;
private boolean rangeTruncated;
/*
* ProtectedFileRange protects files in 'range' from being deleted. The
* range start is changed during initialization and when the head of the
* index is truncated. It is changed while synchronized on 'this' to
* guarantees that files are not deleted while the range head is locked.
*/
private final FileProtector.ProtectedFileRange protectedFileRange;
/*
* In the future, we may want to vary stride, maxMappings and maxDistance
* dynamically, in reaction to efficient the mappings look.
*/
private final int stride;
private final int maxMappings;
private final int maxDistance;
private final LongStat nBucketsCreated;
/*
* Create an VLSNTracker, with the range initialized from the mapping db.
*/
VLSNTracker(EnvironmentImpl envImpl,
DatabaseImpl mappingDbImpl,
int stride,
int maxMappings,
int maxDistance,
StatGroup statistics)
throws DatabaseException {
this.stride = stride;
this.maxMappings = maxMappings;
this.maxDistance = maxDistance;
this.envImpl = envImpl;
nBucketsCreated =
new LongStat(statistics, VLSNIndexStatDefinition.N_BUCKETS_CREATED);
bucketCache = new TreeMap<>();
/*
* Protect all files initially. The range lower bound will be advanced
* later during initialization. This special ProtectedFileRange does
* not impact LogSizeStats -- see FileProtector.
*/
final FileProtector fileProtector = envImpl.getFileProtector();
protectedFileRange = fileProtector.protectFileRange(
FileProtector.VLSN_INDEX_NAME, 0 /*rangeStart*/,
true /*protectVlsnIndex*/, false /*protectBarrenFiles*/);
fileProtector.setVLSNIndexProtectedFileRange(protectedFileRange);
/* Read the current range information. */
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
LongBinding.longToEntry(VLSNRange.RANGE_KEY, key);
Cursor cursor = null;
Locker locker = null;
try {
locker = BasicLocker.createBasicLocker(envImpl);
cursor = DbInternal.makeCursor(mappingDbImpl,
locker,
CursorConfig.DEFAULT);
DbInternal.getCursorImpl(cursor).setAllowEviction(false);
OperationStatus status = cursor.getSearchKey(key, data,
LockMode.DEFAULT);
if (status == OperationStatus.SUCCESS) {
/* initialize the range from the database. */
VLSNRangeBinding rangeBinding = new VLSNRangeBinding();
range = rangeBinding.entryToObject(data);
lastOnDiskVLSN = range.getLast();
} else if (status == OperationStatus.NOTFOUND) {
/* No mappings exist before. */
range = VLSNRange.EMPTY;
} else {
throw EnvironmentFailureException.unexpectedState
("VLSNTracker init: status=" + status);
}
} finally {
if (cursor != null) {
cursor.close();
}
if (locker != null) {
locker.operationEnd(true);
}
}
}
/*
* Create an empty VLSNTracker. Used during recovery.
*/
VLSNTracker(EnvironmentImpl envImpl,
int stride,
int maxMappings,
int maxDistance) {
this.envImpl = envImpl;
this.stride = stride;
this.maxMappings = maxMappings;
this.maxDistance = maxDistance;
this.protectedFileRange = null;
/* Set up a temporary stat group for use during recovery */
StatGroup statistics =
new StatGroup(VLSNIndexStatDefinition.GROUP_NAME,
VLSNIndexStatDefinition.GROUP_DESC);
nBucketsCreated =
new LongStat(statistics,
VLSNIndexStatDefinition.N_BUCKETS_CREATED);
initEmpty();
}
void initEmpty() {
bucketCache = new TreeMap<>();
range = VLSNRange.EMPTY;
}
/**
* Return a bucket for reading a mapping for this VLSN. If vlsn is >
* lastOnDisk, a bucket is guaranteed to be returned.
*/
synchronized VLSNBucket getGTEBucket(VLSN vlsn) {
if (lastOnDiskVLSN.compareTo(vlsn) >= 0) {
/* The bucket is in the database, not the cache. */
return null;
}
Long pivot = vlsn.getSequence() + 1;
SortedMap<Long, VLSNBucket> head = bucketCache.headMap(pivot);
VLSNBucket prevBucket = null;
if (head.size() > 0) {
prevBucket = head.get(head.lastKey());
if (prevBucket.owns(vlsn)) {
return prevBucket;
}
}
/*
* If the key is not in the headMap, we must return the next bucket
* with mappings that follow on.
*/
SortedMap<Long, VLSNBucket> tail = bucketCache.tailMap(pivot);
if (tail.size() > 0) {
VLSNBucket bucket = tail.get(tail.firstKey());
assert (bucket.owns(vlsn) || bucket.follows(vlsn)) :
"VLSN " + vlsn + " got wrong bucket " + bucket;
return bucket;
}
throw EnvironmentFailureException.unexpectedState
(envImpl, "VLSN " + vlsn + " should be held within this tracker. " +
this + " prevBucket=" + prevBucket);
}
/**
* Get the bucket which holds a mapping for this VLSN. If there is no such
* bucket, get the one directly preceding it. If this VLSN is >=
* firstTrackedVLSN, then we should be able to guarantee that a bucket is
* returned.
*/
synchronized VLSNBucket getLTEBucket(VLSN vlsn) {
if (firstTrackedVLSN.equals(NULL_VLSN) ||
(firstTrackedVLSN.compareTo(vlsn) > 0)) {
return null;
}
Long pivot = vlsn.getSequence() + 1;
SortedMap<Long, VLSNBucket> head = bucketCache.headMap(pivot);
if (head.size() > 0) {
return head.get(head.lastKey());
}
/*
* We shouldn't get here. Get the tail purely for creating a debugging
* message.
*/
SortedMap<Long, VLSNBucket> tail = bucketCache.tailMap(pivot);
VLSNBucket nextBucket = null;
if (tail.size() > 0) {
nextBucket = tail.get(tail.firstKey());
}
throw EnvironmentFailureException.unexpectedState
(envImpl, "VLSN " + vlsn + " should be held within this tracker. " +
this + " nextBucket=" + nextBucket);
}
/**
* Record a new VLSN->LSN mapping. We guarantee that the first and last
* VLSNs in the range have a mapping. If a VLSN comes out of order, we will
* discard it, and will not update the range.
*/
synchronized void track(VLSN vlsn, long lsn, byte entryTypeNum) {
VLSNBucket currentBucket;
if (vlsn.compareTo(lastOnDiskVLSN) < 0) {
/*
* This VLSN is a laggard. It belongs to a bucket that has already
* gone to disk. Since on disk buckets can't be modified, throw
* this mapping away. Do be sure to update the range in case
* lastSync or lastTxnEnd should be updated as a result of this
* mapping.
*/
updateRange(vlsn, entryTypeNum);
return;
}
if (bucketCache.size() == 0) {
/* Nothing in the tracker, add a new bucket. */
currentBucket = new VLSNBucket(DbLsn.getFileNumber(lsn), stride,
maxMappings, maxDistance, vlsn);
nBucketsCreated.increment();
bucketCache.put(currentBucket.getFirst().getSequence(),
currentBucket);
} else {
/* Find the last bucket. Only the last bucket is updateable. */
currentBucket = bucketCache.get(bucketCache.lastKey());
}
/*
* This VLSN is a laggard that was preceded by an earlier mapping which
* came unseasonably early. This VLSN can't fit into the current
* bucket, and since we only want to update the last bucket, we'll
* throw this mapping away.
*/
if (currentBucket.follows(vlsn)) {
updateRange(vlsn, entryTypeNum);
return;
}
if (!currentBucket.put(vlsn, lsn)) {
/*
* Couldn't put the mapping in this bucket. Close it and move to a
* new one.
*/
currentBucket =
new VLSNBucket(DbLsn.getFileNumber(lsn), stride,
maxMappings, maxDistance, vlsn);
nBucketsCreated.increment();
bucketCache.put(currentBucket.getFirst().getSequence(),
currentBucket);
if (!currentBucket.put(vlsn, lsn)) {
throw EnvironmentFailureException.unexpectedState
(envImpl, "Couldn't put VLSN " + vlsn + " into " +
currentBucket);
}
}
updateRange(vlsn, entryTypeNum);
/*
* Only update the firstTrackedVLSN if this mapping is really resident
* in a bucket. Don't update it if it's an out-of-order, skipped
* mapping.
*/
if (firstTrackedVLSN == NULL_VLSN) {
firstTrackedVLSN = vlsn;
}
}
/**
* Update the range with a newly arrived VLSN.
*/
private void updateRange(VLSN vlsn, byte entryTypeNum) {
/*
* Get a reference to the volatile range field so that we always see
* consistent values.
*/
VLSNRange currentRange = range;
range = currentRange.getUpdateForNewMapping(vlsn, entryTypeNum);
}
/**
* Flush the tracker cache to disk.
* Ideally, we'd allow concurrent VLSN put() calls while a flush to database
* is happening. To do that, we need a more sophisticated synchronization
* scheme that defines the immutable vs. mutable portion of the tracker
* cache. See SR [#17689]
*/
synchronized void flushToDatabase(DatabaseImpl mappingDbImpl, Txn txn) {
VLSNRange flushRange = range;
if (bucketCache.size() == 0) {
/*
* No mappings to write out, but it's possible that the range
* changed due to a truncation, in which case we must update the
* database. RangeTruncated is used to reduce the chance that
* we are doing unnecessary writes of the range record to the db.
*/
if (rangeTruncated) {
lastOnDiskVLSN = flushRange.writeToDatabase(envImpl,
mappingDbImpl, txn);
rangeTruncated = false;
}
return;
}
/*
* Save information about the portion of the cache that we are trying
* to flush. Close off the last bucket, so the flush portion becomes
* immutable. In the past, this was in a synchronization block.
* This isn't strictly needed right now, since the method is currently
* fully synchronized, but is a good practice, and will be
* necessary if future changes make it possible to have concurrent
* puts.
*/
VLSNBucket lastBucket = bucketCache.get(bucketCache.lastKey());
lastBucket.close();
/*
* Write the flush portion to disk. Ideally, this portion would be done
* without synchronization on the tracker, to permit concurrent reads
* and writes to the tracker. Note that because there is no
* synchronization, the buckets must be flushed to disk in vlsn order,
* so that the vlsn index always looks consistent. There should be no
* gaps in the bucket range. This assumption permits concurrent access
* by VLSNIndex.getGTEBucketFromDatabase.
*/
VLSN currentLastVLSN = lastOnDiskVLSN;
for (Long key : bucketCache.keySet()) {
VLSNBucket target = bucketCache.get(key);
/* Do some sanity checking before we write this bucket to disk. */
validateBeforeWrite(target,
flushRange,
currentLastVLSN,
target==lastBucket);
target.writeToDatabase(envImpl, mappingDbImpl, txn);
currentLastVLSN = target.getLast();
}
lastOnDiskVLSN = flushRange.writeToDatabase(envImpl,
mappingDbImpl, txn);
rangeTruncated = false;
/*
* Update the tracker to remove the parts that have been written to
* disk. Update firstTrackedVLSN, lastOnDiskVLSN, and clear the bucket
* cache.
*
* In the current implementation, bucketCache is guaranteed not to
* change, so we can just clear the cache. If we had written the
* buckets out without synchronization, the cache might have gained
* mappings, and we wouuld have to be careful to set the
* firstTrackedVLSN to be the first VLSN of the remaining buckets, and
* to set lastTrackedVLSN to be the last VLSN in flushRange.
*
* We would also have to explicitly call SortedMap.remove() to remove
* the written buckets. Do not use TreeMap.tailSet, which would
* inadvertently leave us with the semantics of a SubMap, which has
* mandated key ranges. With rollbacks, we may end up adding buckets
* that are "repeats" in the future.
*/
bucketCache.clear();
firstTrackedVLSN = NULL_VLSN;
}
/**
* Do some sanity checking before the write, to help prevent any corruption.
*/
private void validateBeforeWrite(VLSNBucket target,
VLSNRange flushRange,
VLSN currentLastVLSN,
boolean isLastBucket) {
if (!(currentLastVLSN.equals(NULL_VLSN)) &&
(target.getFirst().compareTo(currentLastVLSN) <= 0)) {
throw EnvironmentFailureException.unexpectedState
(envImpl, "target bucket overlaps previous bucket. " +
"currentLastVLSN= " + currentLastVLSN + " target=" +
target);
}
if (target.getLast().compareTo(flushRange.getLast()) > 0) {
throw EnvironmentFailureException.unexpectedState
(envImpl, "target bucket exceeds flush range. " +
"range= " + flushRange + " target=" +
target);
}
if (isLastBucket) {
if (target.getLast().compareTo(flushRange.getLast()) != 0) {
throw EnvironmentFailureException.unexpectedState
(envImpl, "end of last bucket should match end of range. " +
"range= " + flushRange + " target=" + target);
}
}
}
/**
* Initially the protectedFileRange is set to the file of the range start.
* It is advanced later when the range head is truncated.
*/
void initProtectedFileRange(long firstFile) {
protectedFileRange.advanceRange(firstFile);
}
/**
* @see VLSNIndex#protectRangeHead
*/
synchronized ProtectedFileSet protectRangeHead(String lockerName) {
/*
* Protect all files in the range, including barren files. Because we
* are synchronized, we know that protectedFileRange will not be
* advanced and the range head will remain stable.
*/
return envImpl.getFileProtector().protectFileRange(
lockerName, protectedFileRange.getRangeStart(),
true /*protectVlsnIndex*/, true /*protectBarrenFiles*/);
}
/**
* Returns the file at the lower bound of the current range. This method
* does not synchronize.
*/
long getProtectedRangeStartFile() {
return protectedFileRange.getRangeStart();
}
/**
* Try to advance the VLSNIndex ProtectedFileRange and truncate the head
* of the tracker's range, so that bytesNeeded can be freed by deleting
* files in this range.
*
* @return {deleteEnd, deleteFileNum} pair if the range was changed, or
* null if no change is currently needed/possible.
* -- deleteEnd is the last VLSN to be truncated.
* -- deleteFileNum the file having deleteEnd as its last VLSN.
*/
synchronized Pair<VLSN, Long> tryTruncateFromHead(
final long bytesNeeded,
final LogItemCache logItemCache) {
/* Preserve the last matchpoint and a minimum number of VLSNs. */
final long preserveVlsn = getPreserveVLSN();
if (preserveVlsn < 0) {
return null;
}
/* Determine the vlsn/file after which to truncate the range. */
final Pair<VLSN, Long> truncateInfo =
envImpl.getFileProtector().checkVLSNIndexTruncation(
bytesNeeded, new VLSN(preserveVlsn));
if (truncateInfo == null) {
return null;
}
/*
* It is possible that after checkVLSNIndexTruncation returns, a file
* in the range we expect to delete becomes protected. This is safe,
* because we know that a syncup will not begin while synchronized on
* the VLSNTracker, and feeders always advance their range (never
* retreat). It is also still beneficial to truncate the index because
* we expect the other protection for such a file to be removed soon,
* and then the file can be deleted quickly.
*/
return truncateFromHead(
truncateInfo.first(), truncateInfo.second(), logItemCache) ?
truncateInfo : null;
}
/**
* Like {@link #tryTruncateFromHead(long, LogItemCache)} but allows
* specifying a specific {deleteEnd, deleteFileNum}.
*
* <p>There are several reasons for returning false. TODO: Return more
* info so that {@link DataEraser#getNextFile} can distinguish between
* these cases.</p>
* <ol>
* <li>The files are protected.</li>
* <li>The files are needed to retain at least
* {@link RepParams#MIN_VLSN_INDEX_SIZE} VLSNs.</li>
* <li>The truncation truncation was performed earlier, perhaps by
* another thread.</li>
* </ol>
*/
synchronized boolean tryTruncateFromHead(
final VLSN deleteEnd,
final long deleteFileNum,
final LogItemCache logItemCache) {
/* Preserve the last matchpoint and a minimum number of VLSNs. */
if (deleteEnd.getSequence() >= getPreserveVLSN()) {
return false;
}
/* Determine whether the VLSNIndex for this file is protected. */
if (!envImpl.getFileProtector().checkVLSNIndexTruncation(
deleteFileNum)) {
return false;
}
return truncateFromHead(deleteEnd, deleteFileNum, logItemCache);
}
/**
* Returns the start of the VLSN range that must be preserved. This is
* the minimum of two values:
*
* 1. The lastSync VLSN, which is the last matchpoint and is needed
* to perform syncup.
*
* 2. The last VLSN minus MIN_VLSN_INDEX_SIZE, which is the minimum
* number of VLSNs to preserve for good measure and to prevent the
* VLSNIndex from becoming empty.
*
* Note that a negative value may be returned.
*/
private long getPreserveVLSN() {
return Math.min(
range.getLastSync().getSequence(),
range.getLast().getSequence() -
envImpl.getConfigManager().getInt(
RepParams.MIN_VLSN_INDEX_SIZE));
}
/**
* Truncate the head of the tracker's range, and advance the
* ProtectedFileRange accordingly.
*
* @return true if range is changed, or false if no change is needed.
*/
synchronized boolean truncateFromHead(
VLSN deleteEnd,
long deleteFileNum,
LogItemCache logItemCache) {
/* The caller should pass a non-null VLSN, but check for safety. */
if (deleteEnd.equals(VLSN.NULL_VLSN)) {
return false;
}
/*
* Check the VLSN found in the deleted file against the existing
* mappings. The range should not be empty, and doing the truncation
* should not remove the last sync point. We assume that once this
* environment has received any portion of the replication stream, it
* will maintain a minimum set of VLSNs.
*/
VLSNRange oldRange = range;
if (oldRange.getFirst().compareTo(deleteEnd) > 0) {
/* deleteEnd has already been cast out of the index. */
return false;
}
if (oldRange.isEmpty()) {
throw EnvironmentFailureException.unexpectedState(
envImpl, "Didn't expect current range to be empty. " +
" End of delete range = " + deleteEnd);
}
if (!oldRange.getLastSync().equals(NULL_VLSN) &&
(deleteEnd.compareTo(oldRange.getLastSync()) > 0)) {
throw EnvironmentFailureException.unexpectedState(
envImpl, "Can't log clean away last matchpoint. DeleteEnd= " +
deleteEnd + " lastSync=" + oldRange.getLastSync());
}
/*
* Sanity checks are over and we are committed to changing the range.
*
* Clear the LogItemCache in case it contains entries with VLSNs being
* truncated. This seems extremely unlikely, since the cache is a small
* number of entries at the end of the VLSN range. But perhaps it is
* simplest to clear it in order to guarantee safely, and incur the
* infrequent cost of a few cache misses.
*/
if (logItemCache != null) {
logItemCache.clear();
}
/*
* Advance the ProtectedFileRange to allow files in the truncated
* portion to be deleted. deleteFileNum is the file having deleteEnd as
* its highest VLSN. The VLSN range will now start with deleteEnd+1, so
* now we only need to protect files starting at deleteFileNum+1.
*/
protectedFileRange.advanceRange(deleteFileNum + 1);
/*
* Update the in-memory, cached portion of the index.
*
* Update range first, to prevent others from attempting to read the
* truncated portions. Update the firstTrackedLSN last, after the
* buckets are removed; getLTEBucket relies on the firstTrackedLSN
* value.
*/
range = oldRange.shortenFromHead(deleteEnd);
/*
* Ensure that the range is written to the db even if the resulting
* number of buckets is 0.
*/
rangeTruncated = true;
/*
* afterDelete may not == range.getFirst, becayse range.getFirst may
* be NULL_VLSN if the entire vlsn index has been deleted.
*/
VLSN afterDelete = deleteEnd.getNext();
/*
* Check if the cached buckets needs to be modified. Suppose vlsns 1-8
* are on disk, and vlsns 12->40 are in the tracker cache, with an out
* of order gap from 9 - 11.
*
* case 1: deleteEnd <= 8, Do not need to truncate anything in the
* bucket cacke, but we may have to make a ghost bucket to fill the
* gap.
* case 2: deleteEnd == 11, we don't need to modify the cache or
* create a ghost bucket.
* case 3: deleteEnd >8 and < 11 is in the gap, we need to create a
* ghost bucket to hold the beginning of the range..
* case 4: deleteEnd >= 12, we need to modify the bucket cache.
*/
if (!lastOnDiskVLSN.equals(VLSN.NULL_VLSN) &&
(lastOnDiskVLSN.compareTo(deleteEnd) >= 0)) {
/*
* case 1: the buckets in the tracker cache are unaffected, all
* the pertinent mappings are on disk.
*/
if (lastOnDiskVLSN.equals(deleteEnd)) {
if (firstTrackedVLSN.compareTo(afterDelete) > 0) {
checkForGhostBucket(bucketCache, deleteFileNum);
}
lastOnDiskVLSN = NULL_VLSN;
}
return true;
}
assert(!firstTrackedVLSN.equals(NULL_VLSN));
if (firstTrackedVLSN.equals(afterDelete)) {
/* case 2: The tracker is lined up perfectly with the new range. */
lastOnDiskVLSN = NULL_VLSN;
return true;
}
if (firstTrackedVLSN.compareTo(afterDelete) > 0) {
/*
* case 3: we have to make a ghost bucket.
*/
checkForGhostBucket(bucketCache, deleteFileNum);
lastOnDiskVLSN = NULL_VLSN;
return true;
}
/*
* case 4: we have to prune the buckets.
*/
VLSNBucket owningBucket = getLTEBucket(deleteEnd);
Long retainBucketKey = owningBucket.getFirst().getNext().getSequence();
SortedMap<Long, VLSNBucket> tail;
try {
/*
* We need to chop off part of the bucket cache. Find the portion
* to retain.
*/
tail = bucketCache.tailMap(retainBucketKey);
checkForGhostBucket(tail, deleteFileNum);
bucketCache = tail;
} catch (NoSuchElementException e) {
firstTrackedVLSN = NULL_VLSN;
bucketCache = new TreeMap<>();
}
lastOnDiskVLSN = NULL_VLSN;
return true;
}
private void checkForGhostBucket(SortedMap<Long, VLSNBucket> buckets,
long deleteFileNum) {
/*
* The range has just been truncated. If the first bucket's first vlsn
* != the first in range, insert a GhostBucket.
*/
Long firstKey = buckets.firstKey();
VLSNBucket firstBucket = buckets.get(firstKey);
VLSN firstRangeVLSN = range.getFirst();
VLSN firstBucketVLSN = firstBucket.getFirst();
if (!firstBucketVLSN.equals(firstRangeVLSN)) {
long nextFile =
envImpl.getFileManager().getFollowingFileNum(deleteFileNum,
true /* forward */);
long lastPossibleLsn = firstBucket.getLsn(firstBucketVLSN);
VLSNBucket placeholder = new GhostBucket(firstRangeVLSN,
DbLsn.makeLsn(nextFile, 0),
lastPossibleLsn);
buckets.put(firstRangeVLSN.getSequence(), placeholder);
}
}
/**
* Remove the mappings for VLSNs >= deleteStart. This should only be used
* at syncup, or recovery. We can assume no new mappings are coming in, but
* the VLSNIndex may be read during this time; checkpoints continue to
* happen during rollbacks. Since syncup is always at a sync-able log
* entry, and since we don't roll back past a commit, we can assume that
* the lastSync can be changed to deleteStart.getPrev(), and lastTxnEnd is
* unchanged. The VLSNRange itself is updated before the tracker and
* on-disk buckets are pruned.
*
* The VLSNIndex guarantees that the beginning and end vlsns in the range
* have mappings so LTE and GTE search operations work. When truncating the
* mappings in the tracker, check to see if the new end vlsn has a mapping,
* since that may not happen if the buckets are cut in the middle, or have
* a gap due to out of order mapping. For example, suppose mapping for vlsn
* 20 came before the mappings for vlsn 17, 18 and 19. The buckets would
* have this gap:
* Bucket A: vlsns 10-16
* Bucket B: vlsns 20-25
* Bucket C: vlsns 26 - 30
* If the new end of vlsn range is 11-14, 17, 18, 19, 21 - 24, or 27-29,
* we have to end the vlsn tracker with a new mapping of
* (vlsn (deleteStart-1) -> prevLsn) to cap off the buckets.
*
* Cases:
* End of range is in the gap -- either 17, 18, 19
* 1. on disk buckets exist, but don't encompass the gap: bucket A on disk
* 2. all buckets are in the tracker cache: bucket A in tracker
* 3. on disk buckets encompass the gap: bucket A and B on disk.
*/
synchronized void truncateFromTail(VLSN deleteStart, long prevLsn) {
/*
* Update the range first, it will define the range that the vlsnIndex
* covers. Then adjust any mappings held in the bucket cache. Don't
* update the lastOnDiskVLSN marker, which says what is on disk. Since
* the on-disk portion may also get truncated, the caller will update
* the lastOnDiskVLSN field when it inspects the database.
*/
VLSNRange oldRange = range;
range = oldRange.shortenFromEnd(deleteStart);
/*
* Ensure that the range is written to the db even if the resulting
* number of buckets is 0.
*/
rangeTruncated = true;
if (firstTrackedVLSN.equals(NULL_VLSN)) {
/* No mappings in this tracker */
return;
}
if (firstTrackedVLSN.compareTo(deleteStart) >= 0) {
/*
* Everything in this tracker should be removed. In addition, the
* caller may be removing some items from the database.
*/
bucketCache.clear();
firstTrackedVLSN = NULL_VLSN;
return;
}
/*
* We need to do some pruning of the bucket in the cache. Find the
* buckets in the bucketCache that have mappings >= deleteStart, and
* remove their mappings. First establish the headset of buckets that
* should be preserved.
*/
VLSNBucket targetBucket = getGTEBucket(deleteStart);
Long targetKey = targetBucket.getFirst().getSequence();
/* The newCache has buckets that should be preserved. */
SortedMap<Long, VLSNBucket> newCache =
new TreeMap<>(bucketCache.headMap(targetKey));
/*
* Prune any mappings >= deleteStart out of targetBucket. If it has any
* mappings left, add it to newCache.
*/
targetBucket.removeFromTail(deleteStart, prevLsn);
if (!targetBucket.empty()) {
newCache.put(targetBucket.getFirst().getSequence(),
targetBucket);
}
bucketCache = newCache;
/*
* Now all truncated mappings have been removed from the index. Check
* that the end point of the vlsn range is represented in this
* tracker. Since vlsn mappings can come out of order, it's
* possible that the buckets have a gap, and that truncation will
* cleave the bucket cache just at a point where a mapping is
* missing. For example, vlsn mappings come in this order:
* vlsn 16, vlsn 18, vlsn 17
* causing the buckets to have this gap:
* bucket N: vlsn 10 - 16
* bucket N+1: vlsn 18 - 20
* and that the vlsn range is truncated with a deleteStart of vlsn 18.
* The new end range becomes 17, but the buckets do not have a
* mapping for vlsn 17. If so, create a mapping now.
*
* If we are at this point, we know that the tracker should contain
* a mapping for the last VLSN in the range. Fix it now, so the
* datastructure is as tidy as possible.
*/
boolean needEndMapping;
if (bucketCache.isEmpty()) {
needEndMapping = true;
} else {
VLSNBucket lastBucket = bucketCache.get(bucketCache.lastKey());
needEndMapping =
(lastBucket.getLast().compareTo(range.getLast()) < 0);
}
if (needEndMapping) {
addEndMapping(range.getLast(), prevLsn);
}
}
/**
* Called by the VLSNIndex to see if we have to add a bucket in the tracker
* after pruning the on-disk storage. The get{LTE,GTE}Bucket methods
* assume that there are mappings for the first and last VLSN in the range.
* But since out of order vlsn mappings can cause non-contiguous buckets,
* the pruning may have lost the end mapping. [#23491]
*/
synchronized void ensureRangeEndIsMapped(VLSN lastVLSN, long lastLsn) {
/*
* if lastOnDiskVLSN < lastVLSN < firstTrackedVLSN or
* lastOnDiskVLSN < lastVLSN and firstTrackedVLSN is null
* then we need to add a mapping for lastVLSN->lastLsn. Otherwise a
* mapping already exists.
*/
if (lastOnDiskVLSN.compareTo(lastVLSN) < 0) {
/*
* The on-disk vlsn mappings aren't sufficient to cover the
* lastVLSN. There needs to be a mapping in the tracker for the
* end point of the range.
*/
if (!firstTrackedVLSN.equals(NULL_VLSN)) {
/*
* There are mappings in the tracker, so we can assume
* that they cover the end point, because truncateFromTail()
* should have adjusted for that. But assert to be sure!
*/
if (lastVLSN.compareTo(firstTrackedVLSN) < 0) {
throw EnvironmentFailureException.unexpectedState
(envImpl,
"Expected this tracker to cover vlsn " + lastVLSN +
" after truncateFromTail. LastOnDiskVLSN= " +
lastOnDiskVLSN + " tracker=" + this);
}
return;
}
/*
* There are no mappings in the tracker. The on disk mappings were
* pruned, and a gap was created between what was on disk and what
* is in the tracker cache. Add a mapping.
*/
addEndMapping(lastVLSN, lastLsn);
}
}
/**
* Add a bucket holding a lastVLSN -> lastLsn mapping
*/
private void addEndMapping(VLSN lastVLSN, long lastLsn) {
/* Assert that it's in the range */
assert (lastVLSN.compareTo(range.getLast()) == 0) :
"lastVLSN=" + lastVLSN + " lastLsn = " + lastLsn +
" range=" + range;
VLSNBucket addBucket =
new VLSNBucket(DbLsn.getFileNumber(lastLsn), stride,
maxMappings, maxDistance, lastVLSN);
addBucket.put(lastVLSN, lastLsn);
nBucketsCreated.increment();
bucketCache.put(addBucket.getFirst().getSequence(), addBucket);
if (firstTrackedVLSN.equals(NULL_VLSN)) {
firstTrackedVLSN = lastVLSN;
}
}
/**
* Attempt to replace the mappings in this vlsnIndex for
* deleteStart->lastVLSN with those from the recovery mapper.
* Since we do not supply a prevLsn argument, we may not be able to "cap"
* the truncated vlsn bucket the same what that we can in
* truncateFromTail. Because of that, we may need to remove mappings that
* are < deleteStart.
*
* For example, suppose a bucket holds mappings
* 10 -> 101
* 15 -> 201
* 18 -> 301
* If deleteStart == 17, we will have to delete all the way to vlsn 15.
*
* The maintenance of VLSNRange.lastSync and lastTxnEnd are simplified
* because of assumptions we can make because we are about to append
* mappings found by the recovery tracker that begin at deleteStart. If
* lastSync and lastTxnEnd are <= deleteStart, we know that they will
* either stay the same, or be replaced by lastSync and lastTxnEnd from the
* recoveryTracker. Even we delete mappings that are < deleteStart, we know
* that we did not roll back past an abort or commit, so that we do not
* have to updated the lastSync or lastTxnEnd value.
*/
void merge(VLSN prunedLastOnDiskVLSN, VLSNRecoveryTracker recoveryTracker) {
VLSNRange oldRange = range;
range = oldRange.merge(recoveryTracker.range);
VLSN recoveryFirst = recoveryTracker.getRange().getFirst();
lastOnDiskVLSN = prunedLastOnDiskVLSN;
/*
* Find the buckets in the bucketCache that have mappings >=
* recoveryFirst, and remove their mappings. First establish the
* headset of buckets that should be preserved. At this point, we
* have already pruned the database, so the bucket set may not
* be fully contiguous -- we may have pruned away mappings that
* would normally be in the database.
*/
if (bucketCache.size() == 0) {
/* Just take all the recovery tracker's mappings */
bucketCache = recoveryTracker.bucketCache;
} else {
VLSNBucket targetBucket = getGTEBucket(recoveryFirst);
Long targetKey = targetBucket.getFirst().getSequence();
/* The newCache holds buckets that should be preserved. */
SortedMap<Long, VLSNBucket> newCache =
new TreeMap<>(bucketCache.headMap(targetKey));
/*
* Prune any mappings >= recoveryFirst out of targetBucket. If it
* has any mappings left, add it to newCache.
*/
targetBucket.removeFromTail(recoveryFirst, DbLsn.NULL_LSN);
if (!targetBucket.empty()) {
newCache.put(targetBucket.getFirst().getSequence(),
targetBucket);
}
newCache.putAll(recoveryTracker.bucketCache);
bucketCache = newCache;
}
if (bucketCache.size() > 0) {
VLSNBucket firstBucket = bucketCache.get(bucketCache.firstKey());
firstTrackedVLSN = firstBucket.getFirst();
}
}
void append(VLSNRecoveryTracker recoveryTracker) {
/*
* This method assume that there is no overlap between this tracker
* and the recovery tracker. Everything in this tracker should precede
* the recovery tracker.
*/
if (!range.getLast().isNull()) {
if (range.getLast().compareTo
(recoveryTracker.getFirstTracked()) >= 0) {
throw EnvironmentFailureException.unexpectedState
(envImpl,
"Expected this tracker to precede recovery tracker. " +
"This tracker= " + this + " recoveryTracker = " +
recoveryTracker);
}
}
bucketCache.putAll(recoveryTracker.bucketCache);
VLSNRange currentRange = range;
range = currentRange.getUpdate(recoveryTracker.getRange());
if (bucketCache.size() > 0) {
VLSNBucket firstBucket = bucketCache.get(bucketCache.firstKey());
firstTrackedVLSN = firstBucket.getFirst();
}
}
VLSNRange getRange() {
return range;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(range);
sb.append(" firstTracked=").append(firstTrackedVLSN);
sb.append(" lastOnDiskVLSN=").append(lastOnDiskVLSN);
for (VLSNBucket bucket: bucketCache.values()) {
sb.append("\n");
sb.append(bucket);
}
return sb.toString();
}
/**
* For unit test support.
*/
synchronized boolean verify(boolean verbose) {
if (!range.verify(verbose)) {
return false;
}
/* Check that all the buckets are in order. */
ArrayList<VLSN> firstVLSN = new ArrayList<>();
ArrayList<VLSN> lastVLSN = new ArrayList<>();
for (VLSNBucket b : bucketCache.values()) {
firstVLSN.add(b.getFirst());
lastVLSN.add(b.getLast());
}
if (!verifyBucketBoundaries(firstVLSN, lastVLSN)) {
return false;
}
if (firstVLSN.size() > 0) {
if (!firstVLSN.get(0).equals(firstTrackedVLSN)) {
if (verbose) {
System.err.println("firstBucketVLSN = " +
firstVLSN.get(0) + " should equal " +
firstTrackedVLSN);
}
return false;
}
VLSN lastBucketVLSN = lastVLSN.get(lastVLSN.size() -1);
if (!lastBucketVLSN.equals(range.getLast())) {
if (verbose) {
System.err.println("lastBucketVLSN = " + lastBucketVLSN +
" should equal " + range.getLast());
}
return false;
}
}
return true;
}
/*
* Used to check bucket boundaries for both the in-memory flush list and
* the on-disk database buckets. Buckets may not be adjacent, but must be
* in order.
*/
static boolean verifyBucketBoundaries(ArrayList<VLSN> firstVLSN,
ArrayList<VLSN> lastVLSN) {
for (int i = 1; i < firstVLSN.size(); i++) {
VLSN first = firstVLSN.get(i);
VLSN prevLast = lastVLSN.get(i-1);
if (first.compareTo(prevLast.getNext()) < 0) {
System.out.println("Boundary problem: bucket " + i +
" first " + first +
" follows bucket.last " + prevLast);
return false;
}
}
return true;
}
VLSN getFirstTracked() {
return firstTrackedVLSN;
}
VLSN getLastOnDisk() {
return lastOnDiskVLSN;
}
void setLastOnDiskVLSN(VLSN lastOnDisk) {
lastOnDiskVLSN = lastOnDisk;
}
/*
* For unit test support only. Can only be called when replication stream
* is quiescent.
*/
boolean isFlushedToDisk() {
return lastOnDiskVLSN.equals(range.getLast());
}
void close() {
if (protectedFileRange != null) {
final FileProtector fileProtector = envImpl.getFileProtector();
fileProtector.removeFileProtection(protectedFileRange);
fileProtector.setVLSNIndexProtectedFileRange(null);
}
}
}