| /*- |
| * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved. |
| * |
| * This file was distributed by Oracle as part of a version of Oracle Berkeley |
| * DB Java Edition made available at: |
| * |
| * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html |
| * |
| * Please see the LICENSE file included in the top-level directory of the |
| * appropriate version of Oracle Berkeley DB Java Edition for a copy of the |
| * license and additional information. |
| */ |
| |
| package com.sleepycat.je.log; |
| |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_READ_FROM_WRITEQUEUE; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FILE_OPENS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FSYNC_95_MS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FSYNC_99_MS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FSYNC_AVG_MS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_FSYNC_MAX_MS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_LOG_FSYNCS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_OPEN_FILES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_READ_BYTES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_RANDOM_WRITE_BYTES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_READS_FROM_WRITEQUEUE; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READS; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_READ_BYTES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_SEQUENTIAL_WRITE_BYTES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES; |
| import static com.sleepycat.je.log.LogStatDefinition.FILEMGR_WRITES_FROM_WRITEQUEUE; |
| |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.FilenameFilter; |
| import java.io.IOException; |
| import java.io.RandomAccessFile; |
| import java.nio.ByteBuffer; |
| import java.nio.channels.ClosedChannelException; |
| import java.nio.channels.FileChannel; |
| import java.nio.channels.FileLock; |
| import java.nio.channels.OverlappingFileLockException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashMap; |
| import java.util.Hashtable; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Random; |
| import java.util.Set; |
| import java.util.concurrent.locks.ReentrantLock; |
| |
| import com.sleepycat.je.DatabaseException; |
| import com.sleepycat.je.EnvironmentFailureException; |
| import com.sleepycat.je.EnvironmentLockedException; |
| import com.sleepycat.je.LogWriteException; |
| import com.sleepycat.je.StatsConfig; |
| import com.sleepycat.je.ThreadInterruptedException; |
| import com.sleepycat.je.config.EnvironmentParams; |
| import com.sleepycat.je.dbi.DbConfigManager; |
| import com.sleepycat.je.dbi.EnvironmentFailureReason; |
| import com.sleepycat.je.dbi.EnvironmentImpl; |
| import com.sleepycat.je.log.entry.FileHeaderEntry; |
| import com.sleepycat.je.log.entry.LogEntry; |
| import com.sleepycat.je.utilint.DbLsn; |
| import com.sleepycat.je.utilint.HexFormatter; |
| import com.sleepycat.je.utilint.IntStat; |
| import com.sleepycat.je.utilint.LatencyPercentileStat; |
| import com.sleepycat.je.utilint.LoggerUtils; |
| import com.sleepycat.je.utilint.LongAvgStat; |
| import com.sleepycat.je.utilint.LongMaxZeroStat; |
| import com.sleepycat.je.utilint.LongStat; |
| import com.sleepycat.je.utilint.RelatchRequiredException; |
| import com.sleepycat.je.utilint.StatGroup; |
| import com.sleepycat.je.utilint.Timestamp; |
| |
| /** |
| * The FileManager presents the abstraction of one contiguous file. It doles |
| * out LSNs. |
| */ |
| public class FileManager { |
| |
| public enum FileMode { |
| READ_MODE("r", false), |
| READWRITE_MODE("rw", true), |
| READWRITE_ODSYNC_MODE("rwd", true), |
| READWRITE_OSYNC_MODE("rws", true); |
| |
| private String fileModeValue; |
| private boolean isWritable; |
| |
| private FileMode(String fileModeValue, boolean isWritable) { |
| this.fileModeValue = fileModeValue; |
| this.isWritable = isWritable; |
| } |
| |
| public String getModeValue() { |
| return fileModeValue; |
| } |
| |
| public boolean isWritable() { |
| return isWritable; |
| } |
| } |
| |
| private static final boolean DEBUG = false; |
| |
| /* |
| * The number of writes that have been performed. |
| * |
| * public so that unit tests can diddle them. |
| */ |
| public static long WRITE_COUNT = 0; |
| |
| /* |
| * The write count value where we should stop or throw. |
| */ |
| public static long STOP_ON_WRITE_COUNT = Long.MAX_VALUE; |
| |
| /* |
| * If we're throwing, then throw on write #'s WRITE_COUNT through |
| * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive). |
| */ |
| public static long N_BAD_WRITES = Long.MAX_VALUE; |
| |
| /* |
| * If true, then throw an IOException on write #'s WRITE_COUNT through |
| * WRITE_COUNT + N_BAD_WRITES - 1 (inclusive). |
| */ |
| public static boolean THROW_ON_WRITE = false; |
| |
| public static final String JE_SUFFIX = ".jdb"; // regular log files |
| public static final String DEL_SUFFIX = ".del"; // cleaned files |
| public static final String BAD_SUFFIX = ".bad"; // corrupt files |
| private static final String LOCK_FILE = "je.lck";// lock file |
| static final String[] DEL_SUFFIXES = { DEL_SUFFIX }; |
| static final String[] JE_SUFFIXES = { JE_SUFFIX }; |
| private static final String[] JE_AND_DEL_SUFFIXES = |
| { JE_SUFFIX, DEL_SUFFIX }; |
| |
| /* |
| * The suffix used to denote a file that is in the process of being |
| * transferred during a network backup. The file may not have been |
| * completely transferred, or its digest verified. |
| */ |
| public static final String TMP_SUFFIX = ".tmp"; |
| |
| /* |
| * The suffix used to rename files out of the way, if they are being |
| * retained during a backup. Note that the suffix is used in conjunction |
| * with a backup number as described in <code>NetworkBackup</code> |
| */ |
| public static final String BUP_SUFFIX = ".bup"; |
| |
| /* May be set to false to speed unit tests. */ |
| private boolean syncAtFileEnd = true; |
| |
| private final EnvironmentImpl envImpl; |
| private final long maxFileSize; |
| private final File dbEnvHome; |
| private final File[] dbEnvDataDirs; |
| |
| /* True if .del files should be included in the list of log files. */ |
| private boolean includeDeletedFiles = false; |
| |
| /* File cache */ |
| private final FileCache fileCache; |
| |
| private FileCacheWarmer fileCacheWarmer; |
| |
| /* The channel and lock for the je.lck file. */ |
| private RandomAccessFile lockFile; |
| private FileChannel channel; |
| private FileLock envLock; |
| private FileLock exclLock; |
| |
| /* True if all files should be opened readonly. */ |
| private final boolean readOnly; |
| |
| /* Handles onto log position */ |
| private volatile long currentFileNum; // number of the current file |
| private volatile long nextAvailableLsn; // the next LSN available |
| private volatile long lastUsedLsn; // last LSN used in the current file |
| private boolean forceNewFile; // Force new file on next write |
| |
| /* endOfLog is used for writes and fsyncs to the end of the log. */ |
| private final LogEndFileDescriptor endOfLog; |
| |
| /* |
| * When we bump the LSNs over to a new file, we must remember the last LSN |
| * of the previous file so we can set the prevOffset field of the file |
| * header appropriately. We have to save it in a map because there's a time |
| * lag between when we know what the last LSN is and when we actually do |
| * the file write, because LSN bumping is done before we get a write |
| * buffer. This map is keyed by file num->last LSN. |
| */ |
| private final Map<Long, Long> perFileLastUsedLsn; |
| |
| /* |
| * True if we should use the Write Queue. This queue is enabled by default |
| * and contains any write() operations which were attempted but would have |
| * blocked because an fsync() or another write() was in progress at the |
| * time. The operations on the Write Queue are later executed by the next |
| * operation that is able to grab the fsync latch. File systems like ext3 |
| * need this queue in order to achieve reasonable throughput since it |
| * acquires an exclusive mutex on the inode during any IO operation |
| * (seek/read/write/fsync). OS's like Windows and Solaris do not since |
| * they are able to handle concurrent IO operations on a single file. |
| */ |
| private final boolean useWriteQueue; |
| |
| /* The starting size of the Write Queue. */ |
| private final int writeQueueSize; |
| |
| /* |
| * Use O_DSYNC to open JE log files. |
| */ |
| private final boolean useODSYNC; |
| |
| /* public for unit tests. */ |
| public boolean VERIFY_CHECKSUMS = false; |
| |
| /** {@link EnvironmentParams#LOG_FSYNC_TIME_LIMIT}. */ |
| private final int fSyncTimeLimit; |
| |
| /* |
| * Non-0 means to use envHome/data001 through envHome/data00N for the |
| * environment directories, where N is nDataDirs. Distribute *.jdb files |
| * through dataNNN directories round-robin. |
| */ |
| private final int nDataDirs; |
| |
| /* |
| * Last file to which any IO was done. |
| */ |
| long lastFileNumberTouched = -1; |
| |
| /* |
| * Current file offset of lastFile. |
| */ |
| long lastFileTouchedOffset = 0; |
| |
| /* |
| * For IO stats, this is a measure of what is "close enough" to constitute |
| * a sequential IO vs a random IO. 1MB for now. Generally a seek within a |
| * few tracks of the current disk track is "fast" and only requires a |
| * single rotational latency. |
| */ |
| private static final long ADJACENT_TRACK_SEEK_DELTA = 1 << 20; |
| |
| /* |
| * Used to detect unexpected file deletion. |
| */ |
| private final FileDeletionDetector fdd; |
| |
| /** |
| * Used to prevent three operations from executing concurrently: file |
| * deletion, file renaming, and opening a file for read-write access. |
| * |
| * When opening a file for read-write access, we never want to create the |
| * file (when it doesn't previously exist). There is no file open mode in |
| * Java to do this. This mutex is used to make the check for file existence |
| * and the opening of the file into an atomic operation, such that the |
| * file cannot be deleted or renamed in between. |
| * |
| * The mutex is large grained (it is global) but this is not considered a |
| * problem because all three operations it protects are infrequent and |
| * they are not in the CRUD operation path. |
| */ |
| private final Object fileUpdateMutex = new Object(); |
| |
| /* |
| * Stats |
| */ |
| final StatGroup stats; |
| final LongStat nRandomReads; |
| final LongStat nRandomWrites; |
| final LongStat nSequentialReads; |
| final LongStat nSequentialWrites; |
| final LongStat nRandomReadBytes; |
| final LongStat nRandomWriteBytes; |
| final LongStat nSequentialReadBytes; |
| final LongStat nSequentialWriteBytes; |
| final IntStat nFileOpens; |
| final IntStat nOpenFiles; |
| final LongStat nBytesReadFromWriteQueue; |
| final LongStat nBytesWrittenFromWriteQueue; |
| final LongStat nReadsFromWriteQueue; |
| final LongStat nWritesFromWriteQueue; |
| final LongStat nWriteQueueOverflow; |
| final LongStat nWriteQueueOverflowFailures; |
| /* all fsyncs, includes those issued for group commit */ |
| final LongStat nLogFSyncs; |
| final LongAvgStat fSyncAvgMs; |
| final LatencyPercentileStat fSync95Ms; |
| final LatencyPercentileStat fSync99Ms; |
| final LongMaxZeroStat fSyncMaxMs; |
| |
| /** |
| * Set up the file cache and initialize the file manager to point to the |
| * beginning of the log. |
| * |
| * @param dbEnvHome environment home directory |
| * |
| * @throws IllegalArgumentException via Environment ctor |
| * |
| * @throws EnvironmentLockedException via Environment ctor |
| */ |
| public FileManager(EnvironmentImpl envImpl, |
| File dbEnvHome, |
| boolean readOnly) |
| throws EnvironmentLockedException { |
| |
| this.envImpl = envImpl; |
| this.dbEnvHome = dbEnvHome; |
| this.readOnly = readOnly; |
| |
| boolean success = false; |
| |
| stats = new StatGroup(LogStatDefinition.FILEMGR_GROUP_NAME, |
| LogStatDefinition.FILEMGR_GROUP_DESC); |
| nRandomReads = new LongStat(stats, FILEMGR_RANDOM_READS); |
| nRandomWrites = new LongStat(stats, FILEMGR_RANDOM_WRITES); |
| nSequentialReads = new LongStat(stats, FILEMGR_SEQUENTIAL_READS); |
| nSequentialWrites = new LongStat(stats, FILEMGR_SEQUENTIAL_WRITES); |
| nRandomReadBytes = new LongStat(stats, FILEMGR_RANDOM_READ_BYTES); |
| nRandomWriteBytes = new LongStat(stats, FILEMGR_RANDOM_WRITE_BYTES); |
| nSequentialReadBytes = |
| new LongStat(stats, FILEMGR_SEQUENTIAL_READ_BYTES); |
| nSequentialWriteBytes = |
| new LongStat(stats, FILEMGR_SEQUENTIAL_WRITE_BYTES); |
| nFileOpens = new IntStat(stats, FILEMGR_FILE_OPENS); |
| nOpenFiles = new IntStat(stats, FILEMGR_OPEN_FILES); |
| nBytesReadFromWriteQueue = |
| new LongStat(stats, FILEMGR_BYTES_READ_FROM_WRITEQUEUE); |
| nBytesWrittenFromWriteQueue = |
| new LongStat(stats, FILEMGR_BYTES_WRITTEN_FROM_WRITEQUEUE); |
| nReadsFromWriteQueue = |
| new LongStat(stats, FILEMGR_READS_FROM_WRITEQUEUE); |
| nWritesFromWriteQueue = |
| new LongStat(stats, FILEMGR_WRITES_FROM_WRITEQUEUE); |
| nWriteQueueOverflow = new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW); |
| nWriteQueueOverflowFailures = |
| new LongStat(stats, FILEMGR_WRITEQUEUE_OVERFLOW_FAILURES); |
| nLogFSyncs = new LongStat(stats, FILEMGR_LOG_FSYNCS); |
| fSyncAvgMs = new LongAvgStat(stats, FILEMGR_FSYNC_AVG_MS); |
| fSync95Ms = |
| new LatencyPercentileStat(stats, FILEMGR_FSYNC_95_MS, 0.95f); |
| fSync99Ms = |
| new LatencyPercentileStat(stats, FILEMGR_FSYNC_99_MS, 0.99f); |
| fSyncMaxMs = new LongMaxZeroStat(stats, FILEMGR_FSYNC_MAX_MS); |
| |
| try { |
| /* Read configurations. */ |
| DbConfigManager configManager = envImpl.getConfigManager(); |
| maxFileSize = |
| configManager.getLong(EnvironmentParams.LOG_FILE_MAX); |
| |
| useWriteQueue = configManager.getBoolean( |
| EnvironmentParams.LOG_USE_WRITE_QUEUE); |
| |
| writeQueueSize = configManager.getInt( |
| EnvironmentParams.LOG_WRITE_QUEUE_SIZE); |
| |
| useODSYNC = configManager.getBoolean( |
| EnvironmentParams.LOG_USE_ODSYNC); |
| |
| VERIFY_CHECKSUMS = configManager.getBoolean( |
| EnvironmentParams.LOG_VERIFY_CHECKSUMS); |
| |
| fSyncTimeLimit = configManager.getDuration( |
| EnvironmentParams.LOG_FSYNC_TIME_LIMIT); |
| |
| nDataDirs = configManager.getInt( |
| EnvironmentParams.LOG_N_DATA_DIRECTORIES); |
| |
| if (nDataDirs != 0) { |
| dbEnvDataDirs = gatherDataDirs(); |
| } else { |
| checkNoDataDirs(); |
| dbEnvDataDirs = null; |
| } |
| |
| if (!envImpl.isMemOnly()) { |
| if (!dbEnvHome.exists()) { |
| throw new IllegalArgumentException |
| ("Environment home " + dbEnvHome + " doesn't exist"); |
| } |
| |
| /* |
| * If this is an arbiter take an exclusive lock. |
| */ |
| boolean isReadOnly = envImpl.isArbiter() ? false : readOnly; |
| if (!lockEnvironment(isReadOnly, false)) { |
| throw new EnvironmentLockedException |
| (envImpl, |
| "The environment cannot be locked for " + |
| (isReadOnly ? "shared" : "single writer") + |
| " access."); |
| } |
| } |
| |
| /* Cache of files. */ |
| fileCache = new FileCache(configManager); |
| |
| /* Start out as if no log existed. */ |
| currentFileNum = 0L; |
| nextAvailableLsn = |
| DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); |
| lastUsedLsn = DbLsn.NULL_LSN; |
| perFileLastUsedLsn = |
| Collections.synchronizedMap(new HashMap<Long, Long>()); |
| endOfLog = new LogEndFileDescriptor(); |
| forceNewFile = false; |
| |
| final String stopOnWriteCountName = "je.debug.stopOnWriteCount"; |
| final String stopOnWriteCountProp = |
| System.getProperty(stopOnWriteCountName); |
| if (stopOnWriteCountProp != null) { |
| try { |
| STOP_ON_WRITE_COUNT = Long.parseLong(stopOnWriteCountProp); |
| } catch (NumberFormatException e) { |
| throw new IllegalArgumentException |
| ("Could not parse: " + stopOnWriteCountName, e); |
| } |
| } |
| |
| final String stopOnWriteActionName = "je.debug.stopOnWriteAction"; |
| final String stopOnWriteActionProp = |
| System.getProperty(stopOnWriteActionName); |
| if (stopOnWriteActionProp != null) { |
| if (stopOnWriteActionProp.compareToIgnoreCase("throw") == 0) { |
| THROW_ON_WRITE = true; |
| } else if (stopOnWriteActionProp. |
| compareToIgnoreCase("stop") == 0) { |
| THROW_ON_WRITE = false; |
| } else { |
| throw new IllegalArgumentException |
| ("Unknown value for: " + stopOnWriteActionName + |
| stopOnWriteActionProp); |
| } |
| } |
| |
| final Boolean logFileDeleteDetect = configManager.getBoolean( |
| EnvironmentParams.LOG_DETECT_FILE_DELETE); |
| if (!envImpl.isMemOnly() && logFileDeleteDetect) { |
| fdd = new FileDeletionDetector( |
| dbEnvHome, dbEnvDataDirs, envImpl); |
| } else { |
| fdd = null; |
| } |
| |
| success = true; |
| } finally { |
| if (!success) { |
| try { |
| close(); |
| } catch (IOException e) { |
| /* |
| * Klockwork - ok |
| * Eat it, we want to throw the original exception. |
| */ |
| } |
| } |
| } |
| } |
| |
| /** |
| * Set the file manager's "end of log". |
| * |
| * @param nextAvailableLsn LSN to be used for the next log entry |
| * @param lastUsedLsn last LSN to have a valid entry, may be null |
| * @param prevOffset value to use for the prevOffset of the next entry. |
| * If the beginning of the file, this is 0. |
| */ |
| public void setLastPosition(long nextAvailableLsn, |
| long lastUsedLsn, |
| long prevOffset) { |
| this.lastUsedLsn = lastUsedLsn; |
| perFileLastUsedLsn.put(DbLsn.getFileNumber(lastUsedLsn), lastUsedLsn); |
| this.nextAvailableLsn = nextAvailableLsn; |
| currentFileNum = DbLsn.getFileNumber(this.nextAvailableLsn); |
| } |
| |
| /** |
| * May be used to disable sync at file end to speed unit tests. |
| * Must only be used for unit testing, since log corruption may result. |
| */ |
| public void setSyncAtFileEnd(boolean sync) { |
| syncAtFileEnd = sync; |
| } |
| |
| /* |
| * File management |
| */ |
| |
| /** |
| * public for cleaner. |
| * |
| * @return the number of the first file in this environment. |
| */ |
| public Long getFirstFileNum() { |
| return getFileNum(true); |
| } |
| |
| public boolean getReadOnly() { |
| return readOnly; |
| } |
| |
| /** |
| * @return the number of the last file in this environment. |
| */ |
| public Long getLastFileNum() { |
| return getFileNum(false); |
| } |
| |
| /** |
| * Returns the highest (current) file number. Note that this is |
| * unsynchronized, so if it is called outside the log write latch it is |
| * only valid as an approximation. |
| */ |
| public long getCurrentFileNum() { |
| return currentFileNum; |
| } |
| |
| /** |
| * For unit tests. |
| */ |
| boolean getUseWriteQueue() { |
| return useWriteQueue; |
| } |
| |
| /** |
| * For assertions that check whether a file is valid or has been deleted |
| * via log cleaning. |
| */ |
| public boolean isFileValid(long fileNum) { |
| |
| /* |
| * If the file is the current file, it may be buffered and not yet |
| * created. If the env is memory-only, we will never create or delete |
| * log files. |
| */ |
| if (fileNum == currentFileNum || envImpl.isMemOnly()) { |
| return true; |
| } |
| |
| /* Check for file existence. */ |
| String fileName = getFullFileName(fileNum, FileManager.JE_SUFFIX); |
| File file = new File(fileName); |
| return file.exists(); |
| } |
| |
| public void setIncludeDeletedFiles(boolean includeDeletedFiles) { |
| this.includeDeletedFiles = includeDeletedFiles; |
| } |
| |
| /** |
| * Get all JE file numbers. |
| * @return an array of all JE file numbers. |
| */ |
| public Long[] getAllFileNumbers() { |
| /* Get all the names in sorted order. */ |
| String[] names = listFileNames(JE_SUFFIXES); |
| Long[] nums = new Long[names.length]; |
| for (int i = 0; i < nums.length; i += 1) { |
| String name = names[i]; |
| long num = nums[i] = getNumFromName(name); |
| if (nDataDirs != 0) { |
| int dbEnvDataDirsIdx = getDataDirIndexFromName(name) - 1; |
| if (dbEnvDataDirsIdx != (num % nDataDirs)) { |
| throw EnvironmentFailureException.unexpectedState |
| ("Found file " + name + " but it should have been in " + |
| "data directory " + (dbEnvDataDirsIdx + 1) + |
| ". Perhaps it was moved or restored incorrectly?"); |
| } |
| } |
| } |
| return nums; |
| } |
| |
| /** |
| * Get the next file number before/after currentFileNum. |
| * @param curFile the file we're at right now. Note that |
| * it may not exist, if it's been cleaned and renamed. |
| * @param forward if true, we want the next larger file, if false |
| * we want the previous file |
| * @return null if there is no following file, or if filenum doesn't exist |
| */ |
| public Long getFollowingFileNum(long curFile, boolean forward) { |
| |
| /* |
| * First try the next/prev file number without listing all files. This |
| * efficiently supports an important use case: reading files during |
| * recovery, where there are no gaps due to log cleaning. If there is a |
| * gap due to log cleaning, fall through and get a list of all files. |
| */ |
| final long tryFile; |
| if (forward) { |
| if (curFile == Long.MAX_VALUE) { |
| return null; |
| } |
| tryFile = curFile + 1; |
| } else { |
| if (curFile <= 0) { |
| return null; |
| } |
| tryFile = curFile - 1; |
| } |
| |
| String tryName = getFullFileName(tryFile, JE_SUFFIX); |
| if ((new File(tryName)).isFile()) { |
| return tryFile; |
| } |
| |
| /* Get all the names in sorted order. */ |
| String[] names = listFileNames(JE_SUFFIXES); |
| |
| /* Search for the current file. */ |
| String searchName = getFileName(curFile, JE_SUFFIX); |
| int foundIdx = Arrays.binarySearch(names, searchName, stringComparator); |
| |
| boolean foundTarget = false; |
| if (foundIdx >= 0) { |
| if (forward) { |
| foundIdx++; |
| } else { |
| foundIdx--; |
| } |
| } else { |
| |
| /* |
| * currentFileNum not found (might have been cleaned). FoundIdx |
| * will be (-insertionPoint - 1). |
| */ |
| foundIdx = Math.abs(foundIdx + 1); |
| if (!forward) { |
| foundIdx--; |
| } |
| } |
| |
| /* The current fileNum is found, return the next or prev file. */ |
| if (forward && (foundIdx < names.length)) { |
| foundTarget = true; |
| } else if (!forward && (foundIdx > -1)) { |
| foundTarget = true; |
| } |
| |
| if (foundTarget) { |
| return getNumFromName(names[foundIdx]); |
| } |
| return null; |
| } |
| |
| /** |
| * @return true if there are any files at all. |
| */ |
| public boolean filesExist() { |
| String[] names = listFileNames(JE_SUFFIXES); |
| return (names.length != 0); |
| } |
| |
| /** |
| * Get the first or last file number in the set of JE files. |
| * |
| * @param first if true, get the first file, else get the last file |
| * @return the file number or null if no files exist |
| */ |
| private Long getFileNum(boolean first) { |
| String[] names = listFileNames(JE_SUFFIXES); |
| if (names.length == 0) { |
| return null; |
| } |
| int index = 0; |
| if (!first) { |
| index = names.length - 1; |
| } |
| return getNumFromName(names[index]); |
| } |
| |
| /** |
| * Get the data dir index from a file name. |
| * |
| * @return index into dbEnvDataDirs of this fileName's data directory. |
| * -1 if multiple data directories are not being used. |
| */ |
| private int getDataDirIndexFromName(String fileName) { |
| if (nDataDirs == 0) { |
| return -1; |
| } |
| |
| int dataDirEnd = fileName.lastIndexOf(File.separator); |
| String dataDir = fileName.substring(0, dataDirEnd); |
| return Integer.valueOf |
| (Integer.parseInt(dataDir.substring("data".length()))); |
| } |
| |
| /** |
| * Get the file number from a file name. |
| * |
| * @param fileName the file name |
| * @return the file number |
| */ |
| public Long getNumFromName(String fileName) { |
| String name = fileName; |
| if (nDataDirs != 0) { |
| name = name.substring(name.lastIndexOf(File.separator) + 1); |
| } |
| String fileNumber = name.substring(0, name.indexOf(".")); |
| return Long.valueOf(Long.parseLong(fileNumber, 16)); |
| } |
| |
| /** |
| * Find JE files. Return names sorted in ascending fashion. |
| * @param suffixes which type of file we're looking for |
| * @return array of file names |
| * |
| * Used by unit tests so package protection. |
| */ |
| String[] listFileNames(String[] suffixes) { |
| JEFileFilter fileFilter = new JEFileFilter(suffixes); |
| return listFileNamesInternal(fileFilter); |
| } |
| |
| /** |
| * Find .jdb files which are {@literal >=} the minimimum file number and |
| * {@literal <=} the maximum file number. |
| * Return names sorted in ascending fashion. |
| * |
| * @return array of file names |
| */ |
| public String[] listFileNames(long minFileNumber, long maxFileNumber) { |
| JEFileFilter fileFilter = |
| new JEFileFilter(JE_SUFFIXES, minFileNumber, maxFileNumber); |
| return listFileNamesInternal(fileFilter); |
| } |
| |
| private static Comparator<File> fileComparator = |
| new Comparator<File>() { |
| |
| private String getFileNum(File file) { |
| String fname = file.toString(); |
| return fname.substring(fname.indexOf(File.separator) + 1); |
| } |
| |
| public int compare(File o1, File o2) { |
| String fnum1 = getFileNum(o1); |
| String fnum2 = getFileNum(o2); |
| return o1.compareTo(o2); |
| } |
| }; |
| |
| private static Comparator<String> stringComparator = |
| new Comparator<String>() { |
| |
| private String getFileNum(String fname) { |
| return fname.substring(fname.indexOf(File.separator) + 1); |
| } |
| |
| public int compare(String o1, String o2) { |
| String fnum1 = getFileNum(o1); |
| String fnum2 = getFileNum(o2); |
| return fnum1.compareTo(fnum2); |
| } |
| }; |
| |
| /** |
| * Find JE files, flavor for unit test support. |
| * |
| * @param suffixes which type of file we're looking for |
| * @return array of file names |
| */ |
| public static String[] listFiles(File envDirFile, |
| String[] suffixes, |
| boolean envMultiSubDir) { |
| String[] names = envDirFile.list(new JEFileFilter(suffixes)); |
| |
| ArrayList<String> subFileNames = new ArrayList<String>(); |
| if (envMultiSubDir) { |
| for (File file : envDirFile.listFiles()) { |
| if (file.isDirectory() && file.getName().startsWith("data")) { |
| File[] subFiles = |
| file.listFiles(new JEFileFilter(suffixes)); |
| for (File subFile : subFiles) { |
| subFileNames.add(file.getName() + |
| File.separator + subFile.getName()); |
| } |
| } |
| } |
| |
| String[] totalFileNames = |
| new String[names.length + subFileNames.size()]; |
| for (int i = 0; i < totalFileNames.length; i++) { |
| if (i < names.length) { |
| totalFileNames[i] = names[i]; |
| } else { |
| totalFileNames[i] = subFileNames.get(i - names.length); |
| } |
| } |
| names = totalFileNames; |
| } |
| |
| if (names != null) { |
| Arrays.sort(names, stringComparator); |
| } else { |
| names = new String[0]; |
| } |
| |
| return names; |
| } |
| |
| public File[] listJDBFiles() { |
| if (nDataDirs == 0) { |
| return listJDBFilesInternalSingleDir(new JEFileFilter(JE_SUFFIXES)); |
| } else { |
| return listJDBFilesInternalMultiDir(new JEFileFilter(JE_SUFFIXES)); |
| } |
| } |
| |
| public File[] listJDBFilesInternalSingleDir(JEFileFilter fileFilter) { |
| File[] files = dbEnvHome.listFiles(fileFilter); |
| if (files != null) { |
| Arrays.sort(files); |
| } else { |
| files = new File[0]; |
| } |
| |
| return files; |
| } |
| |
| public File[] listJDBFilesInternalMultiDir(JEFileFilter fileFilter) { |
| File[][] files = new File[nDataDirs][]; |
| int nTotalFiles = 0; |
| int i = 0; |
| for (File envDir : dbEnvDataDirs) { |
| files[i] = envDir.listFiles(fileFilter); |
| nTotalFiles += files[i].length; |
| i++; |
| } |
| |
| if (nTotalFiles == 0) { |
| return new File[0]; |
| } |
| |
| File[] ret = new File[nTotalFiles]; |
| i = 0; |
| for (File[] envFiles : files) { |
| for (File envFile : envFiles) { |
| ret[i++] = envFile; |
| } |
| } |
| |
| Arrays.sort(ret, fileComparator); |
| return ret; |
| } |
| |
| private String[] listFileNamesInternal(JEFileFilter fileFilter) { |
| if (nDataDirs == 0) { |
| return listFileNamesInternalSingleDir(fileFilter); |
| } else { |
| return listFileNamesInternalMultiDirs(fileFilter); |
| } |
| } |
| |
| private String[] listFileNamesInternalSingleDir(JEFileFilter fileFilter) { |
| String[] fileNames = dbEnvHome.list(fileFilter); |
| if (fileNames != null) { |
| Arrays.sort(fileNames); |
| } else { |
| fileNames = new String[0]; |
| } |
| return fileNames; |
| } |
| |
| private String[] listFileNamesInternalMultiDirs(JEFileFilter filter) { |
| String[][] files = new String[nDataDirs][]; |
| int nTotalFiles = 0; |
| int i = 0; |
| for (File envDir : dbEnvDataDirs) { |
| files[i] = envDir.list(filter); |
| |
| String envDirName = envDir.toString(); |
| String dataDirName = envDirName. |
| substring(envDirName.lastIndexOf(File.separator) + 1); |
| |
| for (int j = 0; j < files[i].length; j += 1) { |
| files[i][j] = dataDirName + File.separator + files[i][j]; |
| } |
| |
| nTotalFiles += files[i].length; |
| i++; |
| } |
| |
| if (nTotalFiles == 0) { |
| return new String[0]; |
| } |
| |
| String[] ret = new String[nTotalFiles]; |
| i = 0; |
| for (String[] envFiles : files) { |
| for (String envFile : envFiles) { |
| ret[i++] = envFile; |
| } |
| } |
| |
| Arrays.sort(ret, stringComparator); |
| return ret; |
| } |
| |
| private void checkNoDataDirs() { |
| String[] dataDirNames = |
| dbEnvHome.list(new FilenameFilter() { |
| public boolean accept(File dir, String name) { |
| /* We'll validate the subdirNum later. */ |
| return name != null && |
| name.length() == "dataNNN".length() && |
| name.startsWith("data"); |
| } |
| } |
| ); |
| if (dataDirNames != null && dataDirNames.length != 0) { |
| throw EnvironmentFailureException.unexpectedState |
| (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() + |
| " was not set and expected to find no" + |
| " data directories, but found " + |
| dataDirNames.length + " data directories instead."); |
| } |
| } |
| |
| public File[] gatherDataDirs() { |
| String[] dataDirNames = |
| dbEnvHome.list(new FilenameFilter() { |
| public boolean accept(File dir, String name) { |
| /* We'll validate the subdirNum later. */ |
| return name != null && |
| name.length() == "dataNNN".length() && |
| name.startsWith("data"); |
| } |
| } |
| ); |
| if (dataDirNames != null) { |
| Arrays.sort(dataDirNames); |
| } else { |
| dataDirNames = new String[0]; |
| } |
| |
| if (dataDirNames.length != nDataDirs) { |
| throw EnvironmentFailureException.unexpectedState |
| (EnvironmentParams.LOG_N_DATA_DIRECTORIES.getName() + |
| " was set and expected to find " + nDataDirs + |
| " data directories, but found " + |
| dataDirNames.length + " instead."); |
| } |
| |
| int ddNum = 1; |
| File[] dataDirs = new File[nDataDirs]; |
| for (String fn : dataDirNames) { |
| String subdirNumStr = fn.substring(4); |
| try { |
| int subdirNum = Integer.parseInt(subdirNumStr); |
| if (subdirNum != ddNum) { |
| throw EnvironmentFailureException.unexpectedState |
| ("Expected to find data subdir: data" + |
| paddedDirNum(ddNum) + |
| " but found data" + |
| subdirNumStr + " instead."); |
| |
| } |
| |
| File dataDir = new File(dbEnvHome, fn); |
| if (!dataDir.exists()) { |
| throw EnvironmentFailureException.unexpectedState |
| ("Data dir: " + dataDir + " doesn't exist."); |
| } |
| if (!dataDir.isDirectory()) { |
| throw EnvironmentFailureException.unexpectedState |
| ("Data dir: " + dataDir + " is not a directory."); |
| } |
| dataDirs[ddNum - 1] = dataDir; |
| } catch (NumberFormatException E) { |
| throw EnvironmentFailureException.unexpectedState |
| ("Illegal data subdir: data" + subdirNumStr); |
| } |
| ddNum++; |
| } |
| return dataDirs; |
| } |
| |
| private String paddedDirNum(int dirNum) { |
| String paddedStr = "000" + dirNum; |
| int len = paddedStr.length(); |
| return paddedStr.substring(len - 3); |
| } |
| |
| /** |
| * @return the full file name and path for the nth JE file. |
| */ |
| String[] getFullFileNames(long fileNum) { |
| if (includeDeletedFiles) { |
| int nSuffixes = JE_AND_DEL_SUFFIXES.length; |
| String[] ret = new String[nSuffixes]; |
| for (int i = 0; i < nSuffixes; i++) { |
| ret[i] = getFullFileName(fileNum, JE_AND_DEL_SUFFIXES[i]); |
| } |
| return ret; |
| } |
| return new String[] { getFullFileName(fileNum, JE_SUFFIX) }; |
| } |
| |
| private File getDataDir(long fileNum) { |
| return (nDataDirs == 0) ? |
| dbEnvHome : |
| dbEnvDataDirs[((int) (fileNum % nDataDirs))]; |
| } |
| |
| public String getFullFileName(long fileNum) { |
| return getFullFileName(fileNum, JE_SUFFIX); |
| } |
| |
| /** |
| * @return the full file name and path for this file name. |
| */ |
| public String getFullFileName(long fileNum, String suffix) { |
| File dbEnvDataDir = getDataDir(fileNum); |
| return dbEnvDataDir + File.separator + getFileName(fileNum, suffix); |
| } |
| |
| /** |
| * @return the file name relative to the env home directory. |
| */ |
| public String getPartialFileName(long fileNum) { |
| String name = getFileName(fileNum, JE_SUFFIX); |
| if (nDataDirs == 0) { |
| return name; |
| } |
| File dataDir = getDataDir(fileNum); |
| return dataDir.getName() + File.separator + name; |
| } |
| |
| /* |
| * Return the full file name of a specified log file name, including the |
| * sub directories names if needed. |
| */ |
| public String getFullFileName(String fileName) { |
| final int suffixStartPos = fileName.indexOf("."); |
| String suffix = fileName.substring(suffixStartPos, fileName.length()); |
| assert suffix != null; |
| String fileNum = fileName.substring(0, suffixStartPos); |
| |
| return getFullFileName |
| (Long.valueOf(Long.parseLong(fileNum, 16)), suffix); |
| } |
| |
| /** |
| * @return the file name for the nth file. |
| */ |
| public static String getFileName(long fileNum, String suffix) { |
| return (getFileNumberString(fileNum) + suffix); |
| } |
| |
| /** @return the file name for the nth log (*.jdb) file. */ |
| public static String getFileName(long fileNum) { |
| return getFileName(fileNum, JE_SUFFIX); |
| } |
| |
| /** |
| * HexFormatter generates a 0 padded string starting with 0x. We want |
| * the right most 8 digits, so start at 10. |
| */ |
| private static String getFileNumberString(long fileNum) { |
| return HexFormatter.formatLong(fileNum).substring(10); |
| } |
| |
| /** |
| * @return true if successful, false if File.renameTo returns false, which |
| * can occur on Windows if the file was recently closed. |
| */ |
| public boolean renameFile(final long fileNum, final String newSuffix) |
| throws IOException, DatabaseException { |
| |
| return renameFile(fileNum, newSuffix, null) != null; |
| } |
| |
| /** |
| * Rename this file to NNNNNNNN.suffix. If that file already exists, try |
| * NNNNNNNN.suffix.1, etc. Used for deleting files or moving corrupt files |
| * aside. |
| * |
| * @param fileNum the file we want to move |
| * |
| * @param newSuffix the new file suffix |
| * |
| * @param subDir the data directory sub-directory to rename the file into. |
| * The subDir must already exist. May be null to leave the file in its |
| * current data directory. |
| * |
| * @return renamed File if successful, or null if File.renameTo returns |
| * false, which can occur on Windows if the file was recently closed. |
| */ |
| public File renameFile(final long fileNum, |
| final String newSuffix, |
| final String subDir) |
| throws IOException { |
| |
| final File oldDir = getDataDir(fileNum); |
| final String oldName = getFileName(fileNum); |
| final File oldFile = new File(oldDir, oldName); |
| |
| final File newDir = |
| (subDir != null) ? (new File(oldDir, subDir)) : oldDir; |
| |
| final String newName = getFileName(fileNum, newSuffix); |
| |
| String generation = ""; |
| int repeatNum = 0; |
| |
| while (true) { |
| final File newFile = new File(newDir, newName + generation); |
| |
| if (newFile.exists()) { |
| repeatNum++; |
| generation = "." + repeatNum; |
| continue; |
| } |
| |
| /* |
| * If CLEANER_EXPUNGE is false, then the cleaner will rename |
| * the .jdb file. The rename action will first delete the |
| * old file and then create the new file. So we should also |
| * record the file rename action here. |
| */ |
| if (fdd != null) { |
| if (oldName.endsWith(FileManager.JE_SUFFIX)) { |
| fdd.addDeletedFile(oldName); |
| } |
| } |
| |
| clearFileCache(fileNum); |
| |
| synchronized (fileUpdateMutex) { |
| final boolean success = oldFile.renameTo(newFile); |
| return success ? newFile : null; |
| } |
| } |
| } |
| |
| /** |
| * Delete log file NNNNNNNN. |
| * |
| * @param fileNum the file we want to move |
| * |
| * @return true if successful, false if File.delete returns false, which |
| * can occur on Windows if the file was recently closed. |
| */ |
| public boolean deleteFile(final long fileNum) |
| throws IOException, DatabaseException { |
| |
| final String fileName = getFullFileNames(fileNum)[0]; |
| |
| /* |
| * Add files deleted by JE to filesDeletedByJE in fdd, which aims to |
| * check whether a deleted file is deleted by JE or by users wrongly. |
| * |
| * The file name gotten from WatchKey is the relative file name, |
| * so we should also get the relative file name here. |
| */ |
| if (fdd != null) { |
| if (fileName.endsWith(FileManager.JE_SUFFIX)) { |
| final int index = fileName.lastIndexOf(File.separator) + 1; |
| final String relativeFileName = fileName.substring(index); |
| fdd.addDeletedFile(relativeFileName); |
| } |
| } |
| |
| clearFileCache(fileNum); |
| final File file = new File(fileName); |
| synchronized (fileUpdateMutex) { |
| return file.delete(); |
| } |
| } |
| |
| /** |
| * Opens an existing file for read-write access. Used in exceptional |
| * circumstances where writing to an existing file is acceptable, but the |
| * file may be deleted or renamed concurrently by other threads. The |
| * latter can occur only if the file is not currently {@link |
| * com.sleepycat.je.cleaner.FileProtector protected}. |
| * |
| * @see #fileUpdateMutex |
| */ |
| public RandomAccessFile openFileReadWrite(final String fullPath) |
| throws FileNotFoundException { |
| |
| final File file = new File(fullPath); |
| |
| synchronized (fileUpdateMutex) { |
| |
| if (!file.exists()) { |
| throw new FileNotFoundException( |
| file.toString() + " does not exist"); |
| } |
| |
| return new RandomAccessFile( |
| file, |
| getAppropriateReadWriteMode().getModeValue()); |
| } |
| } |
| |
| /** |
| * Returns the log version for the given file. |
| */ |
| public int getFileLogVersion(long fileNum) |
| throws DatabaseException { |
| |
| try { |
| FileHandle handle = getFileHandle(fileNum); |
| int logVersion = handle.getLogVersion(); |
| handle.release(); |
| return logVersion; |
| } catch (FileNotFoundException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, e); |
| } catch (ChecksumException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e); |
| } |
| } |
| |
| /** |
| * Return a read only file handle that corresponds to this file number. |
| * Retrieve it from the cache or open it anew and validate the file header. |
| * This method takes a latch on this file, so that the file descriptor will |
| * be held in the cache as long as it's in use. When the user is done with |
| * the file, the latch must be released. |
| * |
| * @param fileNum which file |
| * @return the file handle for the existing or newly created file |
| */ |
| public FileHandle getFileHandle(long fileNum) |
| throws FileNotFoundException, ChecksumException, DatabaseException { |
| |
| /* Check the file cache for this file. */ |
| Long fileId = Long.valueOf(fileNum); |
| FileHandle fileHandle = null; |
| |
| /** |
| * Loop until we get an open FileHandle. |
| */ |
| try { |
| while (true) { |
| |
| /* |
| * The file cache is intentionally not latched here so that |
| * it's not a bottleneck in the fast path. We check that the |
| * file handle that we get back is really still open after we |
| * latch it down below. |
| */ |
| fileHandle = fileCache.get(fileId); |
| |
| /* |
| * If the file isn't in the cache, latch the cache and check |
| * again. Under the latch, if the file is not in the cache we |
| * add it to the cache but do not open the file yet. We latch |
| * the handle here, and open the file further below after |
| * releasing the cache latch. This prevents blocking other |
| * threads that are opening other files while we open this |
| * file. The latch on the handle blocks other threads waiting |
| * to open the same file, which is necessary. |
| */ |
| boolean newHandle = false; |
| if (fileHandle == null) { |
| synchronized (fileCache) { |
| fileHandle = fileCache.get(fileId); |
| if (fileHandle == null) { |
| newHandle = true; |
| fileHandle = addFileHandle(fileId); |
| } |
| } |
| } |
| |
| if (newHandle) { |
| |
| /* |
| * Open the file with the fileHandle latched. It was |
| * latched by addFileHandle above. |
| */ |
| boolean success = false; |
| try { |
| openFileHandle(fileHandle, FileMode.READ_MODE, |
| null /*existingHandle*/); |
| success = true; |
| } finally { |
| if (!success) { |
| /* An exception is in flight -- clean up. */ |
| fileHandle.release(); |
| clearFileCache(fileNum); |
| } |
| } |
| } else { |
| |
| /* |
| * The handle was found in the cache. Latch the fileHandle |
| * before checking getFile below and returning. |
| */ |
| if (!fileHandle.latchNoWait()) { |
| |
| /* |
| * But the handle was latched. Rather than wait, let's |
| * just make a new transient handle. It doesn't need |
| * to be latched, but it does need to be closed. |
| */ |
| final FileHandle existingHandle = fileHandle; |
| fileHandle = new FileHandle( |
| envImpl, fileId, getFileNumberString(fileId)) { |
| @Override |
| public void release() |
| throws DatabaseException { |
| |
| try { |
| close(); |
| } catch (IOException E) { |
| // Ignore |
| } |
| } |
| }; |
| |
| openFileHandle(fileHandle, FileMode.READ_MODE, |
| existingHandle); |
| } |
| } |
| |
| /* |
| * We may have obtained this file handle outside the file cache |
| * latch, so we have to test that the handle is still valid. |
| * If it's not, then loop back and try again. |
| */ |
| if (fileHandle.getFile() == null) { |
| fileHandle.release(); |
| } else { |
| break; |
| } |
| } |
| } catch (FileNotFoundException e) { |
| /* Handle at higher levels. */ |
| throw e; |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_READ, e); |
| } |
| |
| return fileHandle; |
| } |
| |
| /** |
| * Creates a new FileHandle and adds it to the cache, but does not open |
| * the file. |
| * @return the latched FileHandle. |
| */ |
| private FileHandle addFileHandle(Long fileNum) |
| throws IOException, DatabaseException { |
| |
| FileHandle fileHandle = |
| new FileHandle(envImpl, fileNum, getFileNumberString(fileNum)); |
| fileCache.add(fileNum, fileHandle); |
| fileHandle.latch(); |
| return fileHandle; |
| } |
| |
| public FileMode getAppropriateReadWriteMode() { |
| if (useODSYNC) { |
| return FileMode.READWRITE_ODSYNC_MODE; |
| } |
| return FileMode.READWRITE_MODE; |
| } |
| |
| /** |
| * Creates a new handle and opens it. Does not add the handle to the |
| * cache. |
| */ |
| private FileHandle makeFileHandle(long fileNum, FileMode mode) |
| throws FileNotFoundException, ChecksumException { |
| |
| FileHandle fileHandle = |
| new FileHandle(envImpl, fileNum, getFileNumberString(fileNum)); |
| openFileHandle(fileHandle, mode, null /*existingHandle*/); |
| return fileHandle; |
| } |
| |
| /** |
| * Opens the file for the given handle and initializes it. |
| * |
| * @param existingHandle is an already open handle for the same file or |
| * null. If non-null it is used to avoid the cost of reading the file |
| * header. |
| */ |
| private void openFileHandle(FileHandle fileHandle, |
| FileMode mode, |
| FileHandle existingHandle) |
| throws FileNotFoundException, ChecksumException { |
| |
| nFileOpens.increment(); |
| long fileNum = fileHandle.getFileNum(); |
| String[] fileNames = getFullFileNames(fileNum); |
| RandomAccessFile newFile = null; |
| String fileName = null; |
| boolean success = false; |
| try { |
| |
| /* |
| * Open the file. Note that we are going to try a few names to open |
| * this file -- we'll try for N.jdb, and if that doesn't exist and |
| * we're configured to look for all types, we'll look for N.del. |
| */ |
| FileNotFoundException FNFE = null; |
| for (String fileName2 : fileNames) { |
| fileName = fileName2; |
| try { |
| newFile = fileFactory.createFile(dbEnvHome, fileName, |
| mode.getModeValue()); |
| break; |
| } catch (FileNotFoundException e) { |
| /* Save the first exception thrown. */ |
| if (FNFE == null) { |
| FNFE = e; |
| } |
| } |
| } |
| |
| /* |
| * If we didn't find the file or couldn't create it, rethrow the |
| * exception. |
| */ |
| if (newFile == null) { |
| assert FNFE != null; |
| throw FNFE; |
| } |
| |
| /* |
| * If there is an existing open handle, there is no need to read or |
| * validate the header. Note that the log version is zero if the |
| * existing handle is not fully initialized. |
| */ |
| if (existingHandle != null) { |
| final int logVersion = existingHandle.getLogVersion(); |
| if (logVersion > 0) { |
| fileHandle.init(newFile, logVersion); |
| success = true; |
| return; |
| } |
| } |
| |
| int logVersion = LogEntryType.LOG_VERSION; |
| |
| if (newFile.length() == 0) { |
| |
| /* |
| * If the file is empty, reinitialize it if we can. If not, |
| * send the file handle back up; the calling code will deal |
| * with the fact that there's nothing there. |
| */ |
| if (mode.isWritable()) { |
| /* An empty file, write a header. */ |
| long lastLsn = DbLsn.longToLsn(perFileLastUsedLsn.remove |
| (Long.valueOf(fileNum - 1))); |
| long headerPrevOffset = 0; |
| if (lastLsn != DbLsn.NULL_LSN) { |
| headerPrevOffset = DbLsn.getFileOffset(lastLsn); |
| } |
| if ((headerPrevOffset == 0) && |
| (fileNum > 1) && |
| syncAtFileEnd) { |
| /* Get more info if this happens again. [#20732] */ |
| throw EnvironmentFailureException.unexpectedState |
| (envImpl, |
| "Zero prevOffset fileNum=0x" + |
| Long.toHexString(fileNum) + |
| " lastLsn=" + DbLsn.getNoFormatString(lastLsn) + |
| " perFileLastUsedLsn=" + perFileLastUsedLsn + |
| " fileLen=" + newFile.length()); |
| } |
| FileHeader fileHeader = |
| new FileHeader(fileNum, headerPrevOffset); |
| writeFileHeader(newFile, fileName, fileHeader, fileNum); |
| } |
| } else { |
| /* A non-empty file, check the header */ |
| logVersion = |
| readAndValidateFileHeader(newFile, fileName, fileNum); |
| } |
| fileHandle.init(newFile, logVersion); |
| success = true; |
| } catch (FileNotFoundException e) { |
| /* Handle at higher levels. */ |
| throw e; |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_READ, |
| "Couldn't open file " + fileName, e); |
| } catch (DatabaseException e) { |
| |
| /* |
| * Let this exception go as a checksum exception, so it sets the |
| * run recovery state correctly. |
| */ |
| closeFileInErrorCase(newFile); |
| e.addErrorMessage("Couldn't open file " + fileName); |
| throw e; |
| } finally { |
| if (!success) { |
| closeFileInErrorCase(newFile); |
| } |
| } |
| } |
| |
| /** |
| * Close this file and eat any exceptions. Used in catch clauses. |
| */ |
| private void closeFileInErrorCase(RandomAccessFile file) { |
| try { |
| if (file != null) { |
| file.close(); |
| } |
| } catch (Exception e) { |
| } |
| } |
| |
| /** |
| * Read the given JE log file and validate the header. |
| * |
| * @throws DatabaseException if the file header isn't valid |
| * |
| * @return file header log version. |
| */ |
| private int readAndValidateFileHeader(RandomAccessFile file, |
| String fileName, |
| long fileNum) |
| throws ChecksumException, DatabaseException { |
| |
| /* |
| * Read the file header from this file. It's always the first log |
| * entry. |
| * |
| * The special UNKNOWN_FILE_HEADER_VERSION value is passed for reading |
| * the entry header. The actual log version is read as part of the |
| * FileHeader entry. [#16939] |
| */ |
| LogManager logManager = envImpl.getLogManager(); |
| try { |
| LogEntry headerEntry = logManager.getLogEntryAllowChecksumException |
| (DbLsn.makeLsn(fileNum, 0), file, |
| LogEntryType.UNKNOWN_FILE_HEADER_VERSION); |
| FileHeader header = (FileHeader) headerEntry.getMainItem(); |
| return header.validate(envImpl, fileName, fileNum); |
| |
| } catch (ErasedException e) { |
| throw new ChecksumException( |
| "File header is erased, likely corrupt", e); |
| } |
| } |
| |
| /** |
| * Write a proper file header to the given file. |
| */ |
| private void writeFileHeader(RandomAccessFile file, |
| String fileName, |
| FileHeader header, |
| long fileNum) |
| throws DatabaseException { |
| |
| /* Fail loudly if the environment is invalid. */ |
| envImpl.checkIfInvalid(); |
| |
| /* |
| * Fail silent if the environment is not open. |
| */ |
| if (envImpl.mayNotWrite()) { |
| return; |
| } |
| |
| /* Write file header into this buffer in the usual log entry format. */ |
| LogEntry headerLogEntry = |
| new FileHeaderEntry(LogEntryType.LOG_FILE_HEADER, header); |
| ByteBuffer headerBuf = envImpl.getLogManager(). |
| putIntoBuffer(headerLogEntry, |
| 0); // prevLogEntryOffset |
| |
| /* Write the buffer into the channel. */ |
| int bytesWritten; |
| try { |
| if (LOGWRITE_EXCEPTION_TESTING) { |
| generateLogWriteException(file, headerBuf, 0, fileNum); |
| } |
| |
| /* |
| * Always flush header so that file.length() will be non-zero when |
| * this method returns and two threads won't attempt to create the |
| * header. [#20732] |
| */ |
| bytesWritten = writeToFile(file, headerBuf, 0, fileNum, |
| true /*flushRequired*/); |
| |
| } catch (ClosedChannelException e) { |
| |
| /* |
| * The channel should never be closed. It may be closed because |
| * of an interrupt received by another thread. See SR [#10463] |
| */ |
| throw new ThreadInterruptedException |
| (envImpl, "Channel closed, may be due to thread interrupt", e); |
| } catch (IOException e) { |
| /* Possibly an out of disk exception. */ |
| throw new LogWriteException(envImpl, e); |
| } |
| |
| if (bytesWritten != headerLogEntry.getSize() + |
| LogEntryHeader.MIN_HEADER_SIZE) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, |
| "File " + fileName + |
| " was created with an incomplete header. Only " + |
| bytesWritten + " bytes were written."); |
| } |
| } |
| |
| /** |
| * @return the prevOffset field stored in the file header. |
| */ |
| public long getFileHeaderPrevOffset(long fileNum) |
| throws FileNotFoundException, ChecksumException { |
| |
| return readHeader(fileNum).getLastEntryInPrevFileOffset(); |
| } |
| |
| /** |
| * @return the time field stored in the file header, which is effectively |
| * the file's creation time. |
| */ |
| public Timestamp getFileHeaderTimestamp(long fileNum) |
| throws FileNotFoundException, ChecksumException { |
| |
| return readHeader(fileNum).getTimestamp(); |
| } |
| |
| private FileHeader readHeader(long fileNum) |
| throws FileNotFoundException, ChecksumException { |
| |
| try { |
| LogEntry headerEntry = |
| envImpl.getLogManager().getLogEntryAllowChecksumException |
| (DbLsn.makeLsn(fileNum, 0)); |
| return (FileHeader) headerEntry.getMainItem(); |
| } catch (ErasedException e) { |
| throw new ChecksumException( |
| "File header is erased, implied corruption", e); |
| } |
| } |
| |
| /* |
| * Support for writing new log entries |
| */ |
| |
| /** |
| * Returns whether we should flip files to log an entry of 'size' bytes. |
| */ |
| boolean shouldFlipFile(long size) { |
| return forceNewFile || |
| (DbLsn.getFileOffset(nextAvailableLsn) + size) > maxFileSize; |
| } |
| |
| /** |
| * Calculates LSN of next entry to be logged. |
| */ |
| long calculateNextLsn(boolean flippedFile) { |
| return flippedFile ? |
| DbLsn.makeLsn( |
| currentFileNum + 1, |
| FileManager.firstLogEntryOffset()) : |
| nextAvailableLsn; |
| } |
| |
| /** |
| * Advance LSN position after determining the LSN of an entry to be logged. |
| * |
| * <p>When flippedFile is true, this method must be called after flushing |
| * the prior file. We guarantee that certain volatile LSN fields |
| * (currentFileNumber, nextAvailableLsn, lastUsedLsn) are not updated until |
| * after flushing the prior file.</p> |
| * |
| * @param currentLsn value returned by {@link #calculateNextLsn} |
| * |
| * @param size value passed to {@link #shouldFlipFile} |
| * |
| * @param flippedFile value returned by {@link #shouldFlipFile}. |
| * |
| * @return the file offset of the previous LSN that was used. Needed for |
| * constructing the header of the log entry for currentLsn. If the previous |
| * LSN was in the previous file, or this is the very first LSN of the env, |
| * zero is returned. |
| */ |
| long advanceLsn(long currentLsn, long size, boolean flippedFile) { |
| |
| final long prevOffset; |
| |
| if (flippedFile) { |
| assert DbLsn.getFileNumber(currentLsn) == currentFileNum + 1; |
| assert DbLsn.getFileOffset(currentLsn) == firstLogEntryOffset(); |
| |
| perFileLastUsedLsn.put(currentFileNum, lastUsedLsn); |
| currentFileNum += 1; |
| prevOffset = 0; |
| } else { |
| assert DbLsn.getFileNumber(currentLsn) == currentFileNum; |
| |
| prevOffset = (lastUsedLsn == DbLsn.NULL_LSN) ? |
| 0 : DbLsn.getFileOffset(lastUsedLsn); |
| } |
| |
| forceNewFile = false; |
| lastUsedLsn = currentLsn; |
| |
| nextAvailableLsn = DbLsn.makeLsn( |
| currentFileNum, |
| DbLsn.getFileOffset(currentLsn) + size); |
| |
| return prevOffset; |
| } |
| |
| /** |
| * Write out a log buffer to the file. |
| * @param fullBuffer buffer to write |
| * @param flushWriteQueue true if this write can not be queued on the |
| * Write Queue. |
| */ |
| void writeLogBuffer(LogBuffer fullBuffer, boolean flushWriteQueue) |
| throws DatabaseException { |
| |
| /* Fail loudly if the environment is invalid. */ |
| envImpl.checkIfInvalid(); |
| |
| /* |
| * Fail silent if the environment is not open. |
| */ |
| if (envImpl.mayNotWrite()) { |
| return; |
| } |
| |
| /* Use the LSN to figure out what file to write this buffer to. */ |
| long firstLsn = fullBuffer.getFirstLsn(); |
| |
| /* |
| * Is there anything in this write buffer? We could have been called by |
| * the environment shutdown, and nothing is actually in the buffer. |
| */ |
| if (firstLsn != DbLsn.NULL_LSN) { |
| |
| RandomAccessFile file = |
| endOfLog.getWritableFile(DbLsn.getFileNumber(firstLsn), true); |
| ByteBuffer data = fullBuffer.getDataBuffer(); |
| |
| try { |
| |
| /* |
| * Check that we do not overwrite unless the file only contains |
| * a header [#11915] [#12616]. |
| */ |
| assert fullBuffer.getRewriteAllowed() || |
| (DbLsn.getFileOffset(firstLsn) >= file.length() || |
| file.length() == firstLogEntryOffset()) : |
| "FileManager would overwrite non-empty file 0x" + |
| Long.toHexString(DbLsn.getFileNumber(firstLsn)) + |
| " lsnOffset=0x" + |
| Long.toHexString(DbLsn.getFileOffset(firstLsn)) + |
| " fileLength=0x" + |
| Long.toHexString(file.length()); |
| |
| if (LOGWRITE_EXCEPTION_TESTING) { |
| generateLogWriteException |
| (file, data, DbLsn.getFileOffset(firstLsn), |
| DbLsn.getFileNumber(firstLsn)); |
| } |
| writeToFile(file, data, DbLsn.getFileOffset(firstLsn), |
| DbLsn.getFileNumber(firstLsn), |
| flushWriteQueue); |
| } catch (ClosedChannelException e) { |
| |
| /* |
| * The file should never be closed. It may be closed because |
| * of an interrupt received by another thread. See SR [#10463]. |
| */ |
| throw new ThreadInterruptedException |
| (envImpl, "File closed, may be due to thread interrupt", |
| e); |
| } catch (IOException e) { |
| throw new LogWriteException(envImpl, e); |
| } |
| |
| assert EnvironmentImpl.maybeForceYield(); |
| } |
| } |
| |
| /** |
| * Write a buffer to a file at a given offset. |
| */ |
| private int writeToFile(RandomAccessFile file, |
| ByteBuffer data, |
| long destOffset, |
| long fileNum, |
| boolean flushWriteQueue) |
| throws IOException, DatabaseException { |
| |
| bumpWriteCount("write"); |
| |
| int pos = data.position(); |
| int size = data.limit() - pos; |
| |
| if (lastFileNumberTouched == fileNum && |
| (Math.abs(destOffset - lastFileTouchedOffset) < |
| ADJACENT_TRACK_SEEK_DELTA)) { |
| nSequentialWrites.increment(); |
| nSequentialWriteBytes.add(size); |
| } else { |
| nRandomWrites.increment(); |
| nRandomWriteBytes.add(size); |
| } |
| |
| if (VERIFY_CHECKSUMS) { |
| verifyChecksums(data, destOffset, "pre-write"); |
| } |
| |
| /* |
| * Perform a RandomAccessFile write and update the buffer position. |
| * ByteBuffer.array() is safe to use since all non-direct ByteBuffers |
| * have a backing array. |
| * |
| * Synchronization on the file object is needed because two threads may |
| * call seek() on the same file object. |
| * |
| * If the Write Queue is enabled, attempt to get the fsync latch. If |
| * we can't get it, then an fsync or write is in progress and we'd |
| * block anyway. In that case, queue the write operation. |
| */ |
| boolean fsyncLatchAcquired = |
| endOfLog.fsyncFileSynchronizer.tryLock(); |
| boolean enqueueSuccess = false; |
| if (!fsyncLatchAcquired && |
| useWriteQueue && |
| !flushWriteQueue) { |
| enqueueSuccess = |
| endOfLog.enqueueWrite(fileNum, data.array(), destOffset, |
| pos + data.arrayOffset(), size); |
| } |
| |
| if (!enqueueSuccess) { |
| if (!fsyncLatchAcquired) { |
| endOfLog.fsyncFileSynchronizer.lock(); |
| } |
| try { |
| if (useWriteQueue) { |
| endOfLog.dequeuePendingWrites1(); |
| } |
| |
| synchronized (file) { |
| |
| file.seek(destOffset); |
| file.write(data.array(), pos + data.arrayOffset(), size); |
| |
| if (VERIFY_CHECKSUMS) { |
| file.seek(destOffset); |
| file.read( |
| data.array(), pos + data.arrayOffset(), size); |
| verifyChecksums(data, destOffset, "post-write"); |
| } |
| } |
| } finally { |
| endOfLog.fsyncFileSynchronizer.unlock(); |
| } |
| } |
| data.position(pos + size); |
| |
| lastFileNumberTouched = fileNum; |
| lastFileTouchedOffset = destOffset + size; |
| return size; |
| } |
| |
| private void bumpWriteCount(final String debugMsg) |
| throws IOException { |
| |
| if (DEBUG) { |
| System.out.println("Write: " + WRITE_COUNT + " " + debugMsg); |
| } |
| |
| if (++WRITE_COUNT >= STOP_ON_WRITE_COUNT && |
| WRITE_COUNT < (STOP_ON_WRITE_COUNT + N_BAD_WRITES)) { |
| if (THROW_ON_WRITE) { |
| throw new IOException |
| ("IOException generated for testing: " + WRITE_COUNT + |
| " " + debugMsg); |
| } |
| Runtime.getRuntime().halt(0xff); |
| } |
| } |
| |
| /** |
| * Read a buffer from a file at a given offset. We know that the desired |
| * data exists in this file. There's no need to incur extra costs |
| * such as checks of the file length, nor to return status as to whether |
| * this file contains the data. |
| */ |
| void readFromFile(RandomAccessFile file, |
| ByteBuffer readBuffer, |
| long offset, |
| long fileNo) |
| throws DatabaseException { |
| readFromFile(file, readBuffer, offset, fileNo, |
| true /* dataKnownToBeInFile */); |
| } |
| |
| /** |
| * Read a buffer from a file at a given offset. |
| * |
| * @return true if the read buffer is filled, false, if there is nothing |
| * left in the file to read |
| */ |
| boolean readFromFile(RandomAccessFile file, |
| ByteBuffer readBuffer, |
| long offset, |
| long fileNo, |
| boolean dataKnownToBeInFile) |
| throws DatabaseException { |
| |
| /* |
| * All IOExceptions on read turn into EnvironmentFailureExceptions |
| * [#15768]. |
| */ |
| try { |
| |
| /* |
| * Check if there's a pending write(s) in the write queue for this |
| * fileNo/offset and if so, use it to fulfill this read request. |
| */ |
| if (useWriteQueue && |
| endOfLog.checkWriteCache(readBuffer, offset, fileNo)) { |
| return true; |
| } |
| |
| /* |
| * Nothing queued, all data for this file must be in the file. |
| * Note that there's no synchronization between the check of the |
| * write queue above, and this check of file length. It's possible |
| * that a newly written log entry could show up between the |
| * statements, and enter the write queue just after we finish the |
| * check. |
| * |
| * Because of this, callers of this method must abide by one of |
| * three conditions: |
| * 1. They guarantee that the attempt to read a chunk of new data |
| * comes after the new data has been logged by the LogManager. |
| * 2. The files are quiescent when the read is going on. |
| * 3. The caller is sure the data is in this file. |
| * |
| * The replication feeder reader abides by (1) while all other file |
| * readers abide by (2). Callers which are fetching specific log |
| * entries fall under (3). |
| */ |
| boolean readThisFile = true; |
| if (!dataKnownToBeInFile) { |
| |
| /* |
| * Callers who are not sure whether the desired data is in this |
| * file or the next incur the cost of a check of file.length(), |
| * which is a system call. |
| */ |
| readThisFile = (offset < file.length()); |
| } |
| |
| if (readThisFile) { |
| readFromFileInternal(file, readBuffer, offset, fileNo); |
| return true; |
| } |
| |
| return false; |
| } catch (ClosedChannelException e) { |
| |
| /* |
| * The channel should never be closed. It may be closed because |
| * of an interrupt received by another thread. See SR [#10463] |
| */ |
| throw new ThreadInterruptedException |
| (envImpl, "Channel closed, may be due to thread interrupt", e); |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_READ, e); |
| } |
| } |
| |
| private void readFromFileInternal(RandomAccessFile file, |
| ByteBuffer readBuffer, |
| long offset, |
| long fileNum) |
| throws IOException { |
| |
| /* |
| * Perform a RandomAccessFile read and update the buffer position. |
| * ByteBuffer.array() is safe to use since all non-direct ByteBuffers |
| * have a backing array. Synchronization on the file object is needed |
| * because two threads may call seek() on the same file object. |
| */ |
| synchronized (file) { |
| int pos = readBuffer.position(); |
| int size = readBuffer.limit() - pos; |
| |
| if (lastFileNumberTouched == fileNum && |
| (Math.abs(offset - lastFileTouchedOffset) < |
| ADJACENT_TRACK_SEEK_DELTA)) { |
| nSequentialReads.increment(); |
| nSequentialReadBytes.add(size); |
| } else { |
| nRandomReads.increment(); |
| nRandomReadBytes.add(size); |
| } |
| |
| file.seek(offset); |
| |
| int bytesRead = file.read(readBuffer.array(), |
| pos + readBuffer.arrayOffset(), |
| size); |
| if (bytesRead > 0) { |
| readBuffer.position(pos + bytesRead); |
| } |
| |
| lastFileNumberTouched = fileNum; |
| lastFileTouchedOffset = offset + bytesRead; |
| } |
| } |
| |
| private void printLogBuffer(ByteBuffer entryBuffer, long lsn) { |
| |
| int curPos = entryBuffer.position(); |
| |
| while (entryBuffer.remaining() > 0) { |
| |
| int recStartPos = entryBuffer.position(); |
| |
| LogEntryHeader header = null; |
| |
| try { |
| header = new LogEntryHeader( |
| entryBuffer, LogEntryType.LOG_VERSION, lsn); |
| } catch (ChecksumException e) { |
| System.err.println("ChecksumException in printLogBuffer " + e); |
| break; |
| } |
| |
| LogEntryType recType = LogEntryType.findType(header.getType()); |
| int recSize = header.getSize() + header.getItemSize(); |
| |
| System.out.println( |
| "LOGREC " + recType.toStringNoVersion() + |
| " at LSN " + DbLsn.toString(lsn) + |
| " , log buffer offset " + recStartPos); |
| |
| lsn += recSize; |
| |
| entryBuffer.position(recStartPos + recSize); |
| } |
| |
| entryBuffer.position(curPos); |
| } |
| |
| private void verifyChecksums(ByteBuffer entryBuffer, |
| long lsn, |
| String comment) |
| throws IOException { |
| |
| int curPos = entryBuffer.position(); |
| try { |
| while (entryBuffer.remaining() > 0) { |
| int recStartPos = entryBuffer.position(); |
| /* Write buffer contains current log version entries. */ |
| LogEntryHeader header = new LogEntryHeader( |
| entryBuffer, LogEntryType.LOG_VERSION, lsn); |
| verifyChecksum(entryBuffer, header, lsn, comment); |
| entryBuffer.position(recStartPos + header.getSize() + |
| header.getItemSize()); |
| } |
| } catch (ChecksumException e) { |
| System.err.println("ChecksumException: (" + comment + ") " + e); |
| System.err.println("start stack trace"); |
| e.printStackTrace(System.err); |
| System.err.println("end stack trace"); |
| } |
| entryBuffer.position(curPos); |
| } |
| |
| private void verifyChecksum(ByteBuffer entryBuffer, |
| LogEntryHeader header, |
| long lsn, |
| String comment) |
| throws ChecksumException { |
| |
| if (!header.hasChecksum()) { |
| return; |
| } |
| ChecksumValidator validator = null; |
| /* Add header to checksum bytes */ |
| validator = new ChecksumValidator(envImpl); |
| int headerSizeMinusChecksum = header.getSizeMinusChecksum(); |
| int itemStart = entryBuffer.position(); |
| entryBuffer.position(itemStart - headerSizeMinusChecksum); |
| validator.update(entryBuffer, headerSizeMinusChecksum); |
| entryBuffer.position(itemStart); |
| |
| /* |
| * Now that we know the size, read the rest of the entry if the first |
| * read didn't get enough. |
| */ |
| int itemSize = header.getItemSize(); |
| if (entryBuffer.remaining() < itemSize) { |
| System.err.println("Couldn't verify checksum (" + comment + ")"); |
| return; |
| } |
| |
| /* |
| * Do entry validation. Run checksum before checking the entry |
| * type, it will be the more encompassing error. |
| */ |
| validator.update(entryBuffer, itemSize); |
| validator.validate(header.getChecksum(), lsn); |
| } |
| |
| /** |
| * FSync the end of the log. |
| */ |
| void syncLogEnd() |
| throws DatabaseException { |
| |
| try { |
| endOfLog.force(); |
| } catch (IOException e) { |
| throw new LogWriteException |
| (envImpl, "IOException during fsync", e); |
| } |
| } |
| |
| /** |
| * Sync the end of the log, close off this log file. Should only be called |
| * under the log write latch. |
| */ |
| void syncLogEndAndFinishFile() |
| throws DatabaseException, IOException { |
| |
| if (syncAtFileEnd) { |
| syncLogEnd(); |
| } |
| endOfLog.close(); |
| } |
| |
| /** |
| * Returns whether anything is in the write queue. |
| */ |
| public boolean hasQueuedWrites() { |
| return endOfLog.hasQueuedWrites(); |
| } |
| |
| /** |
| * For unit testing only. |
| */ |
| public void testWriteQueueLock() { |
| endOfLog.fsyncFileSynchronizer.lock(); |
| } |
| |
| /** |
| * For unit testing only. |
| */ |
| public void testWriteQueueUnlock() { |
| endOfLog.fsyncFileSynchronizer.unlock(); |
| } |
| |
| public void startFileCacheWarmer(final long recoveryStartLsn){ |
| assert fileCacheWarmer == null; |
| |
| final DbConfigManager cm = envImpl.getConfigManager(); |
| |
| final int warmUpSize = cm.getInt( |
| EnvironmentParams.LOG_FILE_WARM_UP_SIZE); |
| |
| if (warmUpSize == 0) { |
| return; |
| } |
| |
| final int bufSize = cm.getInt( |
| EnvironmentParams.LOG_FILE_WARM_UP_BUF_SIZE); |
| |
| fileCacheWarmer = new FileCacheWarmer( |
| envImpl, recoveryStartLsn, lastUsedLsn, warmUpSize, bufSize); |
| |
| fileCacheWarmer.start(); |
| } |
| |
| private void stopFileCacheWarmer(){ |
| |
| /* |
| * Use fcw local var because fileCacheWarmer can be set to null by |
| * other threads calling clearFileCacheWarmer, namely the cache warmer |
| * thread. |
| */ |
| final FileCacheWarmer fcw = fileCacheWarmer; |
| |
| if (fcw == null) { |
| return; |
| } |
| |
| fcw.shutdown(); |
| |
| clearFileCacheWarmer(); |
| } |
| |
| /* Allow cache warmer thread to be GC'd. */ |
| void clearFileCacheWarmer() { |
| fileCacheWarmer = null; |
| } |
| |
| /** |
| * Close all file handles and empty the cache. |
| */ |
| public void clear() |
| throws IOException, DatabaseException { |
| |
| synchronized (fileCache) { |
| fileCache.clear(); |
| } |
| |
| endOfLog.close(); |
| } |
| |
| /** |
| * Clear the file lock. |
| */ |
| public void close() |
| throws IOException { |
| |
| stopFileCacheWarmer(); |
| |
| if (envLock != null) { |
| envLock.release(); |
| envLock = null; |
| } |
| |
| if (exclLock != null) { |
| exclLock.release(); |
| exclLock = null; |
| } |
| |
| if (channel != null) { |
| channel.close(); |
| channel = null; |
| } |
| |
| if (lockFile != null) { |
| lockFile.close(); |
| lockFile = null; |
| } |
| |
| if (fdd != null) { |
| fdd.close(); |
| } |
| } |
| |
| /** |
| * Lock the environment. Return true if the lock was acquired. If |
| * exclusive is false, then this implements a single writer, multiple |
| * reader lock. If exclusive is true, then implement an exclusive lock. |
| * |
| * There is a lock file and there are two regions of the lock file: byte 0, |
| * and byte 1. Byte 0 is the exclusive writer process area of the lock |
| * file. If an environment is opened for write, then it attempts to take |
| * an exclusive write lock on byte 0. Byte 1 is the shared reader process |
| * area of the lock file. If an environment is opened for read-only, then |
| * it attempts to take a shared lock on byte 1. This is how we implement |
| * single writer, multi reader semantics. |
| * |
| * The cleaner, each time it is invoked, attempts to take an exclusive lock |
| * on byte 1. The owning process already either has an exclusive lock on |
| * byte 0, or a shared lock on byte 1. This will necessarily conflict with |
| * any shared locks on byte 1, even if it's in the same process and there |
| * are no other holders of that shared lock. So if there is only one |
| * read-only process, it will have byte 1 for shared access, and the |
| * cleaner can not run in it because it will attempt to get an exclusive |
| * lock on byte 1 (which is already locked for shared access by itself). |
| * If a write process comes along and tries to run the cleaner, it will |
| * attempt to get an exclusive lock on byte 1. If there are no other |
| * reader processes (with shared locks on byte 1), and no other writers |
| * (which are running cleaners on with exclusive locks on byte 1), then the |
| * cleaner will run. |
| */ |
| public boolean lockEnvironment(boolean rdOnly, boolean exclusive) { |
| try { |
| if (checkEnvHomePermissions(rdOnly)) { |
| return true; |
| } |
| |
| if (lockFile == null) { |
| lockFile = |
| new RandomAccessFile |
| (new File(dbEnvHome, LOCK_FILE), |
| FileMode.READWRITE_MODE.getModeValue()); |
| } |
| |
| channel = lockFile.getChannel(); |
| |
| try { |
| if (exclusive) { |
| |
| /* |
| * To lock exclusive, must have exclusive on |
| * shared reader area (byte 1). |
| */ |
| exclLock = channel.tryLock(1, 1, false); |
| if (exclLock == null) { |
| return false; |
| } |
| return true; |
| } |
| if (rdOnly) { |
| envLock = channel.tryLock(1, 1, true); |
| } else { |
| envLock = channel.tryLock(0, 1, false); |
| } |
| if (envLock == null) { |
| return false; |
| } |
| return true; |
| } catch (OverlappingFileLockException e) { |
| return false; |
| } |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); |
| } |
| } |
| |
| public void releaseExclusiveLock() |
| throws DatabaseException { |
| |
| try { |
| if (exclLock != null) { |
| exclLock.release(); |
| } |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); |
| } |
| } |
| |
| /** |
| * Ensure that if the environment home dir is on readonly media or in a |
| * readonly directory that the environment has been opened for readonly |
| * access. |
| * |
| * @return true if the environment home dir is readonly. |
| * |
| * @throws IllegalArgumentException via Environment ctor |
| */ |
| public boolean checkEnvHomePermissions(boolean rdOnly) |
| throws DatabaseException { |
| |
| if (nDataDirs == 0) { |
| return checkEnvHomePermissionsSingleEnvDir(dbEnvHome, rdOnly); |
| } else { |
| return checkEnvHomePermissionsMultiEnvDir(rdOnly); |
| } |
| } |
| |
| private boolean checkEnvHomePermissionsSingleEnvDir(File dbEnvHome, |
| boolean rdOnly) |
| throws DatabaseException { |
| |
| boolean envDirIsReadOnly = !dbEnvHome.canWrite(); |
| if (envDirIsReadOnly && !rdOnly) { |
| |
| /* |
| * Use the absolute path in the exception message, to |
| * make a mis-specified relative path problem more obvious. |
| */ |
| throw new IllegalArgumentException |
| ("The Environment directory " + |
| dbEnvHome.getAbsolutePath() + |
| " is not writable, but the " + |
| "Environment was opened for read-write access."); |
| } |
| |
| return envDirIsReadOnly; |
| } |
| |
| private boolean checkEnvHomePermissionsMultiEnvDir(boolean rdOnly) |
| throws DatabaseException { |
| |
| for (File dbEnvDir : dbEnvDataDirs) { |
| if (!checkEnvHomePermissionsSingleEnvDir(dbEnvDir, rdOnly)) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /** |
| * Truncate a log at this position. Used by recovery to a timestamp |
| * utilities and by recovery to set the end-of-log position, see |
| * LastFileReader.setEndOfFile(). |
| * |
| * <p>This method forces a new log file to be written next, if the last |
| * file (the file truncated to) has an old version in its header. This |
| * ensures that when the log is opened by an old version of JE, a version |
| * incompatibility will be detected. [#11243]</p> |
| */ |
| public void truncateSingleFile(long fileNum, long offset) |
| throws IOException, DatabaseException { |
| |
| try { |
| FileHandle handle = |
| makeFileHandle(fileNum, getAppropriateReadWriteMode()); |
| RandomAccessFile file = handle.getFile(); |
| |
| try { |
| file.getChannel().truncate(offset); |
| } finally { |
| file.close(); |
| } |
| |
| if (handle.isOldHeaderVersion()) { |
| forceNewFile = true; |
| } |
| } catch (ChecksumException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, e); |
| } |
| } |
| |
| /* |
| * Truncate all log entries after a specified log entry, the position of |
| * that entry is specified by the fileNum and offset, we do this to avoid |
| * the log file gap. Used by replication hard recovery and the |
| * DbTruncateLog utility, see SR [#19463]. |
| */ |
| public void truncateLog(long fileNum, long offset) |
| throws IOException, DatabaseException { |
| |
| /* |
| * Truncate the log files following by this log file in descending |
| * order to avoid the log entry gap, see SR [#19463]. |
| */ |
| for (long i = getLastFileNum(); i >= fileNum; i--) { |
| /* Do nothing if this file doesn't exist. */ |
| if (!isFileValid(i)) { |
| continue; |
| } |
| |
| /* |
| * If this is the file that truncation starts, invoke |
| * truncateSingleFile. If the offset is 0, which means the |
| * FileHeader is also deleted, delete the whole file to avoid a log |
| * file gap. |
| */ |
| if (i == fileNum) { |
| truncateSingleFile(fileNum, offset); |
| if (offset != 0) { |
| continue; |
| } |
| } |
| |
| boolean deleted = deleteFile(i); |
| assert deleted : "File " + getFullFileName(i, JE_SUFFIX) + |
| " not deleted during truncateLog"; |
| } |
| } |
| |
| /** |
| * Mark the specified log entries as invisible and obsolete. The entries |
| * are written here, but are fsync'ed later. If there is any problem or |
| * exception during the setting, the method will throw an |
| * EnvironmentFailureException. |
| * |
| * These changes are made directly to the file, but recently logged log |
| * entries may also be resident in the log buffers. The caller must take |
| * care to call LogManager.flush() before this method, to ensure that all |
| * entries are on disk. |
| * |
| * In addition, we must ensure that after this step, the affected log |
| * entries will only be read via a FileReader, and will not be faulted in |
| * by the LogManager. Entries may be present in the log and in the log |
| * buffers, but only the on disk version is modified by this method. The |
| * LogManager can read directly from the log buffers and may read the |
| * incorrect, non-invisible version of the log entry, rather than the |
| * invisible version from the file. This should not be an issue, because |
| * invisible log entries should be detached from the in-memory tree before |
| * they are made invisible. |
| * |
| * @param fileNum target file. |
| * @param lsns The list of LSNs to make invisible, must be sorted in |
| * ascending order. |
| */ |
| public void makeInvisible(long fileNum, List<Long> lsns) { |
| if (lsns.size() == 0) { |
| return; |
| } |
| |
| /* Open this file. */ |
| FileHandle handle = null; |
| try { |
| |
| /* |
| * Note that we are getting a new, non-cached file handle for |
| * specific use by this method. |
| */ |
| handle = makeFileHandle(fileNum, getAppropriateReadWriteMode()); |
| } catch (ChecksumException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, |
| "Opening file " + fileNum + " for invisible marking ", e); |
| } catch (FileNotFoundException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, |
| "Opening file " + fileNum + " for invisible marking ", e); |
| } |
| RandomAccessFile file = handle.getFile(); |
| |
| /* Set the invisible bit for each entry. */ |
| try { |
| for (Long lsn : lsns) { |
| if (DbLsn.getFileNumber(lsn) != fileNum) { |
| |
| /* |
| * This failure will not invalidate the environment right |
| * away. But since it causes replication syncup to fail, |
| * the environment will shutdown, which is the effect we |
| * want. |
| */ |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.UNEXPECTED_STATE, |
| "LSN of " + DbLsn.getNoFormatString(lsn) + |
| " did not match file number" + fileNum); |
| } |
| |
| int entryFlagsOffset = (int) |
| (DbLsn.getFileOffset(lsn) + LogEntryHeader.FLAGS_OFFSET); |
| file.seek(entryFlagsOffset); |
| byte flags = file.readByte(); |
| byte newFlags = LogEntryHeader.makeInvisible(flags); |
| file.seek(entryFlagsOffset); |
| file.writeByte(newFlags); |
| } |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_WRITE, |
| "Flipping invisibility in file " + fileNum, e); |
| } finally { |
| |
| /* |
| * Just close the file. Fsyncs will be done later on, in the hope |
| * that the OS has already synced asynchronously. |
| */ |
| try { |
| file.close(); |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_WRITE, |
| "Closing after invisibility cloaking: file " + fileNum, e); |
| } |
| } |
| } |
| |
| /** |
| * Fsync this set of log files. Used for replication syncup rollback. |
| */ |
| public void force(Set<Long> fileNums) { |
| for (long fileNum : fileNums) { |
| RandomAccessFile file = null; |
| try { |
| FileHandle handle = |
| makeFileHandle(fileNum, getAppropriateReadWriteMode()); |
| file = handle.getFile(); |
| long start = System.currentTimeMillis(); |
| file.getChannel().force(false); |
| noteFsyncTime(System.currentTimeMillis() - start); |
| } catch (FileNotFoundException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, |
| "Invisible fsyncing file " + fileNum, e); |
| } catch (ChecksumException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_CHECKSUM, |
| "Invisible fsyncing file " + fileNum, e); |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_WRITE, |
| "Invisible fsyncing file " + fileNum, e); |
| } finally { |
| if (file != null) { |
| try { |
| file.close(); |
| } catch (IOException e) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_WRITE, |
| "Invisible fsyncing file " + fileNum, e); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Set the flag that causes a new file to be written before the next write. |
| */ |
| public void forceNewLogFile() { |
| forceNewFile = true; |
| } |
| |
| /** |
| * Return the offset of the first log entry after the file header. |
| * |
| * @return the size in bytes of the file header log entry. |
| */ |
| public static int firstLogEntryOffset() { |
| return FileHeader.entrySize() + LogEntryHeader.MIN_HEADER_SIZE; |
| } |
| |
| /** |
| * Return the next available LSN in the log. Note that this is |
| * unsynchronized, so if it is called outside the log write latch it is |
| * only valid as an approximation. |
| */ |
| public long getNextLsn() { |
| return nextAvailableLsn; |
| } |
| |
| /** |
| * Return the last allocated LSN in the log. Note that this is |
| * unsynchronized, so if it is called outside the log write latch it is |
| * only valid as an approximation. |
| */ |
| public long getLastUsedLsn() { |
| return lastUsedLsn; |
| } |
| |
| StatGroup loadStats(StatsConfig config) { |
| nOpenFiles.set(fileCache.size()); |
| StatGroup copyStats = stats.cloneGroup(config.getClear()); |
| |
| return copyStats; |
| } |
| |
| /* |
| * Unit test support |
| */ |
| |
| /* |
| * @return ids of files in cache |
| */ |
| Set<Long> getCacheKeys() { |
| return fileCache.getCacheKeys(); |
| } |
| |
| /** |
| * Clear a file out of the file cache regardless of mode type. |
| */ |
| private void clearFileCache(long fileNum) |
| throws IOException, DatabaseException { |
| |
| synchronized (fileCache) { |
| fileCache.remove(fileNum); |
| } |
| } |
| |
| /* |
| * The file cache keeps N RandomAccessFile objects cached for file |
| * access. The cache consists of two parts: a Hashtable that doesn't |
| * require extra synchronization, for the most common access, and a linked |
| * list of files to support cache administration. Looking up a file from |
| * the hash table doesn't require extra latching, but adding or deleting a |
| * file does. |
| */ |
| private static class FileCache { |
| private final Map<Long, FileHandle> fileMap; // Long->file |
| private final List<Long> fileList; // list of file numbers |
| private final int fileCacheSize; |
| |
| FileCache(DbConfigManager configManager) { |
| |
| /* |
| * A fileMap maps the file number to FileHandles (RandomAccessFile, |
| * latch). The fileList is a list of Longs to determine which files |
| * to eject out of the file cache if it's too small. |
| */ |
| fileMap = new Hashtable<Long, FileHandle>(); |
| fileList = new LinkedList<Long>(); |
| fileCacheSize = |
| configManager.getInt(EnvironmentParams.LOG_FILE_CACHE_SIZE); |
| } |
| |
| private FileHandle get(Long fileId) { |
| return fileMap.get(fileId); |
| } |
| |
| private void add(Long fileId, FileHandle fileHandle) |
| throws IOException, DatabaseException { |
| |
| /* |
| * Does the cache have any room or do we have to evict? Hunt down |
| * the file list for an unused file. Note that the file cache might |
| * actually grow past the prescribed size if there is nothing |
| * evictable. Should we try to shrink the file cache? Presently if |
| * it grows, it doesn't shrink. |
| */ |
| if (fileList.size() >= fileCacheSize) { |
| Iterator<Long> iter = fileList.iterator(); |
| while (iter.hasNext()) { |
| Long evictId = iter.next(); |
| FileHandle evictTarget = fileMap.get(evictId); |
| |
| /* |
| * Try to latch. If latchNoWait returns false, then another |
| * thread owns this latch. Note that a thread that's trying |
| * to get a new file handle should never already own the |
| * latch on another file handle, because these latches are |
| * meant to be short lived and only held over the i/o out |
| * of the file. |
| */ |
| if (evictTarget.latchNoWait()) { |
| try { |
| fileMap.remove(evictId); |
| iter.remove(); |
| evictTarget.close(); |
| } finally { |
| evictTarget.release(); |
| } |
| break; |
| } |
| } |
| } |
| |
| /* |
| * We've done our best to evict. Add the file the the cache now |
| * whether or not we did evict. |
| */ |
| fileList.add(fileId); |
| fileMap.put(fileId, fileHandle); |
| } |
| |
| /** |
| * Take any file handles corresponding to this file name out of the |
| * cache. A file handle could be there twice, in rd only and in r/w |
| * mode. |
| */ |
| private void remove(long fileNum) |
| throws IOException, DatabaseException { |
| |
| Iterator<Long> iter = fileList.iterator(); |
| while (iter.hasNext()) { |
| Long evictId = iter.next(); |
| if (evictId.longValue() == fileNum) { |
| FileHandle evictTarget = fileMap.get(evictId); |
| try { |
| evictTarget.latch(); |
| fileMap.remove(evictId); |
| iter.remove(); |
| evictTarget.close(); |
| } finally { |
| evictTarget.release(); |
| } |
| } |
| } |
| } |
| |
| private void clear() |
| throws IOException, DatabaseException { |
| |
| Iterator<FileHandle> iter = fileMap.values().iterator(); |
| while (iter.hasNext()) { |
| FileHandle fileHandle = iter.next(); |
| try { |
| fileHandle.latch(); |
| fileHandle.close(); |
| iter.remove(); |
| } finally { |
| fileHandle.release(); |
| } |
| } |
| fileMap.clear(); |
| fileList.clear(); |
| } |
| |
| private Set<Long> getCacheKeys() { |
| return fileMap.keySet(); |
| } |
| |
| private int size() { |
| return fileMap.size(); |
| } |
| } |
| |
| /** |
| * The LogEndFileDescriptor is used to write and fsync the end of the log. |
| * Because the JE log is append only, there is only one logical R/W file |
| * descriptor for the whole environment. This class actually implements two |
| * RandomAccessFile instances, one for writing and one for fsyncing, so the |
| * two types of operations don't block each other. |
| * |
| * The write file descriptor is considered the master. Manipulation of |
| * this class is done under the log write latch. Here's an explanation of |
| * why the log write latch is sufficient to safeguard all operations. |
| * |
| * There are two types of callers who may use this file descriptor: the |
| * thread that is currently writing to the end of the log and any threads |
| * that are fsyncing on behalf of the FSyncManager. |
| * |
| * The writing thread appends data to the file and fsyncs the file when we |
| * flip over to a new log file. The file is only instantiated at the point |
| * that it must do so -- which is either when the first fsync is required |
| * by JE or when the log file is full and we flip files. Therefore, the |
| * writing thread has two actions that change this descriptor -- we |
| * initialize the file descriptor for the given log file at the first write |
| * to the file, and we close the file descriptor when the log file is full. |
| * Therefore is a period when there is no log descriptor -- when we have |
| * not yet written a log buffer into a given log file. |
| * |
| * The fsyncing threads ask for the log end file descriptor asynchronously, |
| * but will never modify it. These threads may arrive at the point when |
| * the file descriptor is null, and therefore skip their fysnc, but that is |
| * fine because it means a writing thread already flipped that target file |
| * and has moved on to the next file. |
| * |
| * Time Activity |
| * 10 thread 1 writes log entry A into file 0x0, issues fsync |
| * outside of log write latch, yields the processor |
| * 20 thread 2 writes log entry B, piggybacks off thread 1 |
| * 30 thread 3 writes log entry C, but no room left in that file, |
| * so it flips the log, and fsyncs file 0x0, all under the log |
| * write latch. It nulls out endOfLogRWFile, moves onto file |
| * 0x1, but doesn't create the file yet. |
| * 40 thread 1 finally comes along, but endOfLogRWFile is null-- |
| * no need to fsync in that case, 0x0 got fsynced. |
| * |
| * If a write is attempted and an fsync is already in progress, then the |
| * information pertaining to the data to be written (data, offset, length) |
| * is saved away in the "queuedWrites" array. When the fsync completes, |
| * the queuedWrites buffer is emptied. This ensures that writes continue |
| * to execute on file systems which block all IO calls during an fsync() |
| * call (e.g. ext3). |
| */ |
| class LogEndFileDescriptor { |
| private RandomAccessFile endOfLogRWFile = null; |
| private RandomAccessFile endOfLogSyncFile = null; |
| private final ReentrantLock fsyncFileSynchronizer = new ReentrantLock(); |
| |
| /* |
| * Holds all data for writes which have been queued due to their |
| * being blocked by an fsync when the original write was attempted. |
| * The next thread to execute an fsync or write will execute any |
| * queued writes in this buffer. |
| * Latch order is fsyncFileSynchronizer, followed by the queuedWrites |
| * mutex [ synchronized (queuedWrites) {} ]. |
| * |
| * Default protection for unit tests. |
| */ |
| private final byte[] queuedWrites = |
| useWriteQueue ? new byte[writeQueueSize] : null; |
| |
| /* Current position in the queuedWrites array. */ |
| private int queuedWritesPosition = 0; |
| |
| /* The starting offset on disk of the first byte in queuedWrites. */ |
| private long qwStartingOffset; |
| |
| /* The file number that the queuedWrites are destined for. */ |
| private long qwFileNum = -1; |
| |
| /* For unit tests. */ |
| void setQueueFileNum(final long qwFileNum) { |
| this.qwFileNum = qwFileNum; |
| } |
| |
| /* |
| * Check if fileNo/offset is present in queuedWrites, and if so, fill |
| * readBuffer with those bytes. We theorize that this is needed |
| * because HA will be reading at the very end of the log and those |
| * writes, if enqueued, may no longer be in LogBuffers in the |
| * LogBufferPool. This might happen in the case of lots of concurrent |
| * non-synchronous writes (with synchronous commits) which become |
| * enqueued in the queuedWrites cache, but cycle out of the LBP. In |
| * general, using synchronous commits with HA is a bad idea. |
| * |
| * Default protection for unit tests. |
| * @return true if more data was available. If so, the read buffer |
| * will be filled up. |
| */ |
| /* private */ |
| boolean checkWriteCache(final ByteBuffer readBuffer, |
| final long requestedOffset, |
| final long fileNum) { |
| |
| int pos = readBuffer.position(); |
| int targetBufSize = readBuffer.limit() - pos; |
| synchronized (queuedWrites) { |
| if (qwFileNum != fileNum) { |
| return false; |
| } |
| |
| if (queuedWritesPosition == 0) { |
| return false; |
| } |
| |
| if (requestedOffset < qwStartingOffset || |
| (qwStartingOffset + queuedWritesPosition) <= |
| requestedOffset) { |
| return false; |
| } |
| |
| /* We have the bytes available. */ |
| int nBytesToCopy = (int) |
| (queuedWritesPosition - |
| (requestedOffset - qwStartingOffset)); |
| nBytesToCopy = Math.min(nBytesToCopy, targetBufSize); |
| readBuffer.put(queuedWrites, |
| (int) (requestedOffset - qwStartingOffset), |
| nBytesToCopy); |
| nBytesReadFromWriteQueue.add(nBytesToCopy); |
| nReadsFromWriteQueue.increment(); |
| return true; |
| } |
| } |
| |
| /* |
| * Enqueue a blocked write call for later execution by the next thread |
| * to do either an fsync or write call. fsyncFileSynchronizer is not |
| * held when this is called. |
| * |
| * Default protection for unit tests. |
| */ |
| /* private */ |
| boolean enqueueWrite(final long fileNum, |
| final byte[] data, |
| final long destOffset, |
| final int arrayOffset, |
| final int size) |
| throws DatabaseException { |
| |
| assert !fsyncFileSynchronizer.isHeldByCurrentThread(); |
| |
| for (int i = 0; i < 2; i++) { |
| try { |
| enqueueWrite1(fileNum, data, destOffset, |
| arrayOffset, size); |
| return true; |
| } catch (RelatchRequiredException RE) { |
| dequeuePendingWrites(); |
| } |
| } |
| |
| /* Give up after two tries. */ |
| nWriteQueueOverflowFailures.increment(); |
| return false; |
| } |
| |
| private void enqueueWrite1(final long fileNum, |
| final byte[] data, |
| final long destOffset, |
| final int arrayOffset, |
| final int size) |
| throws RelatchRequiredException, DatabaseException { |
| |
| /* |
| * The queuedWrites queue only ever holds writes for a single file. |
| * |
| * This check is safe because qwFileNum can only ever change inside |
| * enqueueWrite which can only ever be called while the Log Write |
| * Latch is held. |
| * |
| * NOTE: We believe the commented out second condition is safe |
| * to add to the code if we ever see contention with this call to |
| * dequeuePendingWrites against an fsync. Here is the reasoning: |
| * |
| * queuedWritesPosition is changed in two places: (1) enqueueWrite1 |
| * where it is incremented, and (2) dequeuePendingWrites1 where it |
| * is zeroed. Both of these places are proected by the queuedWrites |
| * mutex. The zero'ing (2) will only make the dequeue unnecessary |
| * so the extra commented out check below is safe since it will |
| * only result in eliminating an unnecessary dequeuePendingWrites |
| * call. |
| */ |
| if (qwFileNum < fileNum /* && queuedWritesPosition > 0 */) { |
| dequeuePendingWrites(); |
| qwFileNum = fileNum; |
| } |
| |
| synchronized (queuedWrites) { |
| boolean overflow = |
| (writeQueueSize - queuedWritesPosition) < size; |
| if (overflow) { |
| nWriteQueueOverflow.increment(); |
| |
| /* |
| * Since we can't write this "write call" into the |
| * ByteBuffer without overflowing, we will try to dequeue |
| * all current writes in the buffer. But that requires |
| * holding the fsyncFileSynchronizer latch first which |
| * would be latching out of order relative to the |
| * queuedWrites mutex. |
| */ |
| throw RelatchRequiredException.relatchRequiredException; |
| } |
| |
| assert qwFileNum == fileNum; |
| int curPos = queuedWritesPosition; |
| if (curPos == 0) { |
| |
| /* |
| * This is the first entry in queue. Set qwStartingOffset. |
| */ |
| qwStartingOffset = destOffset; |
| } |
| |
| if (curPos + qwStartingOffset != destOffset) { |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, |
| "non-consecutive writes queued. " + |
| "qwPos=" + queuedWritesPosition + |
| " write destOffset=" + destOffset); |
| } |
| |
| System.arraycopy(data, arrayOffset, |
| queuedWrites, queuedWritesPosition, |
| size); |
| queuedWritesPosition += size; |
| } |
| } |
| |
| /** |
| * Returns whether anything is in the write queue. |
| */ |
| boolean hasQueuedWrites() { |
| return queuedWritesPosition > 0; |
| } |
| |
| /* |
| * Execute pending writes. Assumes fsyncFileSynchronizer is not held. |
| */ |
| private void dequeuePendingWrites() |
| throws DatabaseException { |
| |
| assert !fsyncFileSynchronizer.isHeldByCurrentThread(); |
| |
| fsyncFileSynchronizer.lock(); |
| try { |
| dequeuePendingWrites1(); |
| } finally { |
| fsyncFileSynchronizer.unlock(); |
| } |
| } |
| |
| /* |
| * Execute pending writes. Assumes fsyncFileSynchronizer is held. |
| */ |
| private void dequeuePendingWrites1() |
| throws DatabaseException { |
| |
| assert fsyncFileSynchronizer.isHeldByCurrentThread(); |
| |
| try { |
| synchronized (queuedWrites) { |
| /* Nothing to see here. Move along. */ |
| if (queuedWritesPosition == 0) { |
| return; |
| } |
| |
| RandomAccessFile file = getWritableFile(qwFileNum, false); |
| synchronized (file) { |
| file.seek(qwStartingOffset); |
| file.write(queuedWrites, 0, queuedWritesPosition); |
| nBytesWrittenFromWriteQueue.add(queuedWritesPosition); |
| nWritesFromWriteQueue.increment(); |
| if (VERIFY_CHECKSUMS) { |
| file.seek(qwStartingOffset); |
| file.read(queuedWrites, 0, queuedWritesPosition); |
| ByteBuffer bb = |
| ByteBuffer.allocate(queuedWritesPosition); |
| bb.put(queuedWrites, 0, queuedWritesPosition); |
| bb.position(0); |
| verifyChecksums |
| (bb, qwStartingOffset, "post-write"); |
| } |
| } |
| |
| /* We flushed the queue. Reset the buffer. */ |
| queuedWritesPosition = 0; |
| } |
| } catch (IOException e) { |
| throw new LogWriteException |
| (envImpl, "IOException during fsync", e); |
| } |
| } |
| |
| /** |
| * getWritableFile must be called under the log write latch. |
| * |
| * Typically, endOfLogRWFile is not null. Hence the |
| * fsyncFileSynchronizer does not need to be locked (which would |
| * block the write queue from operating. |
| */ |
| private RandomAccessFile getWritableFile(final long fileNumber, |
| final boolean doLock) { |
| try { |
| if (endOfLogRWFile == null) { |
| |
| /* |
| * We need to make a file descriptor for the end of the |
| * log. This is guaranteed to be called under the log |
| * write latch. |
| * |
| * Protect both the RWFile and SyncFile under this lock, |
| * to avoid a race for creating the file and writing the |
| * header. [#20732] |
| */ |
| if (doLock) { |
| fsyncFileSynchronizer.lock(); |
| } |
| try { |
| endOfLogRWFile = |
| makeFileHandle(fileNumber, |
| getAppropriateReadWriteMode()). |
| getFile(); |
| endOfLogSyncFile = |
| makeFileHandle(fileNumber, |
| getAppropriateReadWriteMode()). |
| getFile(); |
| } finally { |
| if (doLock) { |
| fsyncFileSynchronizer.unlock(); |
| } |
| } |
| } |
| |
| return endOfLogRWFile; |
| } catch (Exception e) { |
| |
| /* |
| * If we can't get a write channel, we need to invalidate the |
| * environment. |
| */ |
| throw new EnvironmentFailureException |
| (envImpl, EnvironmentFailureReason.LOG_INTEGRITY, e); |
| } |
| } |
| |
| /** |
| * FSync the log file that makes up the end of the log. |
| */ |
| private void force() |
| throws DatabaseException, IOException { |
| |
| /* |
| * Get a local copy of the end of the log file descriptor, it could |
| * change. No need to latch, no harm done if we get an old file |
| * descriptor, because we forcibly fsync under the log write latch |
| * when we switch files. |
| * |
| * If there is no current end file descriptor, we know that the log |
| * file has flipped to a new file since the fsync was issued. |
| */ |
| fsyncFileSynchronizer.lock(); |
| try { |
| |
| /* Flush any queued writes. */ |
| if (useWriteQueue) { |
| dequeuePendingWrites1(); |
| } |
| |
| RandomAccessFile file = endOfLogSyncFile; |
| if (file != null) { |
| bumpWriteCount("fsync"); |
| FileChannel ch = file.getChannel(); |
| |
| long start = System.currentTimeMillis(); |
| try { |
| ch.force(false); |
| } catch (ClosedChannelException e) { |
| |
| /* |
| * The channel should never be closed. It may be closed |
| * because of an interrupt received by another thread. |
| * See SR [#10463]. |
| */ |
| throw new ThreadInterruptedException |
| (envImpl, |
| "Channel closed, may be due to thread interrupt", |
| e); |
| } |
| final long fSyncMs = System.currentTimeMillis() - start; |
| final boolean newMax = noteFsyncTime(fSyncMs); |
| if (newMax && |
| fSyncTimeLimit != 0 && |
| fSyncMs > fSyncTimeLimit) { |
| |
| LoggerUtils.warning( |
| envImpl.getLogger(), envImpl, |
| String.format( |
| "FSync time of %d ms exceeds limit (%d ms)", |
| fSyncMs, fSyncTimeLimit)); |
| } |
| |
| assert EnvironmentImpl.maybeForceYield(); |
| } |
| |
| /* Flush any writes which were queued while fsync'ing. */ |
| if (useWriteQueue) { |
| dequeuePendingWrites1(); |
| } |
| } finally { |
| fsyncFileSynchronizer.unlock(); |
| } |
| } |
| |
| /** |
| * Close the end of the log file descriptor. Use atomic assignment to |
| * ensure that we won't force and close on the same descriptor. |
| */ |
| void close() |
| throws IOException { |
| |
| /* |
| * Protect both the RWFile and SyncFile under this lock out of |
| * paranoia, although we don't expect two threads to call close |
| * concurrently. [#20732] |
| */ |
| fsyncFileSynchronizer.lock(); |
| try { |
| IOException firstException = null; |
| if (endOfLogRWFile != null) { |
| RandomAccessFile file = endOfLogRWFile; |
| |
| /* |
| * Null out so that other threads know endOfLogRWFile is no |
| * longer available. |
| */ |
| endOfLogRWFile = null; |
| try { |
| file.close(); |
| } catch (IOException e) { |
| /* Save this exception, so we can try second close. */ |
| firstException = e; |
| } |
| } |
| if (endOfLogSyncFile != null) { |
| RandomAccessFile file = endOfLogSyncFile; |
| |
| /* |
| * Null out so that other threads know endOfLogSyncFile is |
| * no longer available. |
| */ |
| endOfLogSyncFile = null; |
| file.close(); |
| } |
| |
| if (firstException != null) { |
| throw firstException; |
| } |
| } finally { |
| fsyncFileSynchronizer.unlock(); |
| } |
| } |
| } |
| |
| /* |
| * Generate IOExceptions for testing. |
| */ |
| |
| /* Testing switch. public so others can read the value. */ |
| public static final boolean LOGWRITE_EXCEPTION_TESTING; |
| private static String RRET_PROPERTY_NAME = "je.logwrite.exception.testing"; |
| |
| static { |
| LOGWRITE_EXCEPTION_TESTING = |
| (System.getProperty(RRET_PROPERTY_NAME) != null); |
| } |
| |
| /* Max write counter value. */ |
| private static final int LOGWRITE_EXCEPTION_MAX = 100; |
| /* Current write counter value. */ |
| private int logWriteExceptionCounter = 0; |
| /* Whether an exception has been thrown. */ |
| private boolean logWriteExceptionThrown = false; |
| /* Random number generator. */ |
| private Random logWriteExceptionRandom = null; |
| |
| private void generateLogWriteException(RandomAccessFile file, |
| ByteBuffer data, |
| long destOffset, |
| long fileNum) |
| throws DatabaseException, IOException { |
| |
| if (logWriteExceptionThrown) { |
| (new Exception("Write after LogWriteException")). |
| printStackTrace(); |
| } |
| logWriteExceptionCounter += 1; |
| if (logWriteExceptionCounter >= LOGWRITE_EXCEPTION_MAX) { |
| logWriteExceptionCounter = 0; |
| } |
| if (logWriteExceptionRandom == null) { |
| logWriteExceptionRandom = new Random(System.currentTimeMillis()); |
| } |
| if (logWriteExceptionCounter == |
| logWriteExceptionRandom.nextInt(LOGWRITE_EXCEPTION_MAX)) { |
| int len = logWriteExceptionRandom.nextInt(data.remaining()); |
| if (len > 0) { |
| byte[] a = new byte[len]; |
| data.get(a, 0, len); |
| ByteBuffer buf = ByteBuffer.wrap(a); |
| writeToFile(file, buf, destOffset, fileNum, |
| false /*flushRequired*/); |
| } |
| logWriteExceptionThrown = true; |
| throw new IOException("Randomly generated for testing"); |
| } |
| } |
| |
| /** |
| * The factory interface for creating RandomAccessFiles. For production |
| * use, the default factory is always used and a DefaultRandomAccessFile is |
| * always created. For testing, the factory can be overridden to return a |
| * subclass of DefaultRandomAccessFile that overrides methods and injects |
| * faults, for example. |
| */ |
| public interface FileFactory { |
| |
| /** |
| * @param envHome can be used to distinguish environments in a test |
| * program that opens multiple environments. Not for production use. |
| * |
| * @param fullName the full file name to be passed to the |
| * RandomAccessFile constructor. |
| * |
| * @param mode the file mode to be passed to the RandomAccessFile |
| * constructor. |
| */ |
| RandomAccessFile createFile(File envHome, String fullName, String mode) |
| throws FileNotFoundException; |
| } |
| |
| /** |
| * The RandomAccessFile for production use. Tests that override the |
| * default FileFactory should return a RandomAccessFile that subclasses |
| * this class to inherit workarounds such as the overridden length method. |
| */ |
| public static class DefaultRandomAccessFile extends RandomAccessFile { |
| |
| public DefaultRandomAccessFile(String fullName, String mode) |
| throws FileNotFoundException { |
| |
| super(fullName, mode); |
| } |
| |
| /** |
| * RandomAccessFile.length() is not thread safe and side-effects the |
| * file pointer if interrupted in the middle. It is synchronized here |
| * to work around that problem. |
| */ |
| @Override |
| public synchronized long length() |
| throws IOException { |
| |
| return super.length(); |
| } |
| } |
| |
| /** |
| * The factory instance used to create RandomAccessFiles. This field is |
| * intentionally public and non-static so it may be set by tests. See |
| * FileFactory. |
| */ |
| public static FileFactory fileFactory = new FileFactory() { |
| |
| public RandomAccessFile createFile(File envHome, |
| String fullName, |
| String mode) |
| throws FileNotFoundException { |
| |
| return new DefaultRandomAccessFile(fullName, mode); |
| } |
| }; |
| |
| /** |
| * Update fsync statistics given that an fsync was performed which took the |
| * specified amount of time in milliseconds. Returns true if this value is |
| * a new maximum for the current statistics time period. |
| */ |
| public boolean noteFsyncTime(long fSyncMs) { |
| nLogFSyncs.increment(); |
| fSyncAvgMs.add(fSyncMs); |
| fSync95Ms.add(fSyncMs); |
| fSync99Ms.add(fSyncMs); |
| return fSyncMaxMs.setMax(fSyncMs); |
| } |
| } |