blob: 303b8e60c319bb525151325f01e9324ac9424b24 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
public static final Log LOG = LogFactory.getLog(
FSImageTransactionalStorageInspector.class);
private boolean needToSave = false;
private boolean isUpgradeFinalized = true;
List<FSImageFile> foundImages = new ArrayList<FSImageFile>();
List<EditLogFile> foundEditLogs = new ArrayList<EditLogFile>();
SortedMap<Long, LogGroup> logGroups = new TreeMap<Long, LogGroup>();
long maxSeenTxId = 0;
private static final Pattern IMAGE_REGEX = Pattern.compile(
NameNodeFile.IMAGE.getName() + "_(\\d+)");
@Override
public void inspectDirectory(StorageDirectory sd) throws IOException {
// Was the directory just formatted?
if (!sd.getVersionFile().exists()) {
LOG.info("No version file in " + sd.getRoot());
needToSave |= true;
return;
}
File currentDir = sd.getCurrentDir();
File filesInStorage[];
try {
filesInStorage = FileUtil.listFiles(currentDir);
} catch (IOException ioe) {
LOG.warn("Unable to inspect storage directory " + currentDir,
ioe);
return;
}
for (File f : filesInStorage) {
LOG.debug("Checking file " + f);
String name = f.getName();
// Check for fsimage_*
Matcher imageMatch = IMAGE_REGEX.matcher(name);
if (imageMatch.matches()) {
if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
try {
long txid = Long.valueOf(imageMatch.group(1));
foundImages.add(new FSImageFile(sd, f, txid));
} catch (NumberFormatException nfe) {
LOG.error("Image file " + f + " has improperly formatted " +
"transaction ID");
// skip
}
} else {
LOG.warn("Found image file at " + f + " but storage directory is " +
"not configured to contain images.");
}
}
}
// Check for a seen_txid file, which marks a minimum transaction ID that
// must be included in our load plan.
try {
maxSeenTxId = Math.max(maxSeenTxId, NNStorage.readTransactionIdFile(sd));
} catch (IOException ioe) {
LOG.warn("Unable to determine the max transaction ID seen by " + sd, ioe);
}
List<EditLogFile> editLogs
= FileJournalManager.matchEditLogs(filesInStorage);
if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
for (EditLogFile log : editLogs) {
addEditLog(log);
}
} else if (!editLogs.isEmpty()){
LOG.warn("Found the following edit log file(s) in " + sd +
" even though it was not configured to store edits:\n" +
" " + Joiner.on("\n ").join(editLogs));
}
// set finalized flag
isUpgradeFinalized = isUpgradeFinalized && !sd.getPreviousDir().exists();
}
private void addEditLog(EditLogFile foundEditLog) {
foundEditLogs.add(foundEditLog);
LogGroup group = logGroups.get(foundEditLog.getFirstTxId());
if (group == null) {
group = new LogGroup(foundEditLog.getFirstTxId());
logGroups.put(foundEditLog.getFirstTxId(), group);
}
group.add(foundEditLog);
}
@Override
public boolean isUpgradeFinalized() {
return isUpgradeFinalized;
}
/**
* @return the image that has the most recent associated transaction ID.
* If there are multiple storage directories which contain equal images
* the storage directory that was inspected first will be preferred.
*
* Returns null if no images were found.
*/
FSImageFile getLatestImage() {
FSImageFile ret = null;
for (FSImageFile img : foundImages) {
if (ret == null || img.txId > ret.txId) {
ret = img;
}
}
return ret;
}
public List<FSImageFile> getFoundImages() {
return ImmutableList.copyOf(foundImages);
}
public List<EditLogFile> getEditLogFiles() {
return ImmutableList.copyOf(foundEditLogs);
}
@Override
public LoadPlan createLoadPlan() throws IOException {
if (foundImages.isEmpty()) {
throw new FileNotFoundException("No valid image files found");
}
FSImageFile recoveryImage = getLatestImage();
LogLoadPlan logPlan = createLogLoadPlan(recoveryImage.txId, Long.MAX_VALUE);
return new TransactionalLoadPlan(recoveryImage,
logPlan);
}
/**
* Plan which logs to load in order to bring the namespace up-to-date.
* Transactions will be considered in the range (sinceTxId, maxTxId]
*
* @param sinceTxId the highest txid that is already loaded
* (eg from the image checkpoint)
* @param maxStartTxId ignore any log files that start after this txid
*/
LogLoadPlan createLogLoadPlan(long sinceTxId, long maxStartTxId) throws IOException {
long expectedTxId = sinceTxId + 1;
List<EditLogFile> recoveryLogs = new ArrayList<EditLogFile>();
SortedMap<Long, LogGroup> tailGroups = logGroups.tailMap(expectedTxId);
if (logGroups.size() > tailGroups.size()) {
LOG.debug("Excluded " + (logGroups.size() - tailGroups.size()) +
" groups of logs because they start with a txid less than image " +
"txid " + sinceTxId);
}
SortedMap<Long, LogGroup> usefulGroups;
if (maxStartTxId > sinceTxId) {
usefulGroups = tailGroups.headMap(maxStartTxId);
} else {
usefulGroups = new TreeMap<Long, LogGroup>();
}
if (usefulGroups.size() > tailGroups.size()) {
LOG.debug("Excluded " + (tailGroups.size() - usefulGroups.size()) +
" groups of logs because they start with a txid higher than max " +
"txid " + sinceTxId);
}
for (Map.Entry<Long, LogGroup> entry : usefulGroups.entrySet()) {
long logStartTxId = entry.getKey();
LogGroup logGroup = entry.getValue();
logGroup.planRecovery();
if (expectedTxId != HdfsConstants.INVALID_TXID && logStartTxId != expectedTxId) {
throw new IOException("Expected next log group would start at txid " +
expectedTxId + " but starts at txid " + logStartTxId);
}
// We can pick any of the non-corrupt logs here
recoveryLogs.add(logGroup.getBestNonCorruptLog());
// If this log group was finalized, we know to expect the next
// log group to start at the following txid (ie no gaps)
if (logGroup.hasKnownLastTxId()) {
expectedTxId = logGroup.getLastTxId() + 1;
} else {
// the log group was in-progress so we don't know what ID
// the next group should start from.
expectedTxId = HdfsConstants.INVALID_TXID;
}
}
long lastLogGroupStartTxId = usefulGroups.isEmpty() ?
0 : usefulGroups.lastKey();
if (maxSeenTxId > sinceTxId &&
maxSeenTxId > lastLogGroupStartTxId) {
String msg = "At least one storage directory indicated it has seen a " +
"log segment starting at txid " + maxSeenTxId;
if (usefulGroups.isEmpty()) {
msg += " but there are no logs to load.";
} else {
msg += " but the most recent log file found starts with txid " +
lastLogGroupStartTxId;
}
throw new IOException(msg);
}
return new LogLoadPlan(recoveryLogs,
Lists.newArrayList(usefulGroups.values()));
}
@Override
public boolean needToSave() {
return needToSave;
}
/**
* A group of logs that all start at the same txid.
*
* Handles determining which logs are corrupt and which should be considered
* candidates for loading.
*/
static class LogGroup {
long startTxId;
List<EditLogFile> logs = new ArrayList<EditLogFile>();;
private Set<Long> endTxIds = new TreeSet<Long>();
private boolean hasInProgress = false;
private boolean hasFinalized = false;
LogGroup(long startTxId) {
this.startTxId = startTxId;
}
EditLogFile getBestNonCorruptLog() {
// First look for non-corrupt finalized logs
for (EditLogFile log : logs) {
if (!log.isCorrupt() && !log.isInProgress()) {
return log;
}
}
// Then look for non-corrupt in-progress logs
for (EditLogFile log : logs) {
if (!log.isCorrupt()) {
return log;
}
}
// We should never get here, because we don't get to the planning stage
// without calling planRecovery first, and if we've called planRecovery,
// we would have already thrown if there were no non-corrupt logs!
throw new IllegalStateException(
"No non-corrupt logs for txid " + startTxId);
}
/**
* @return true if we can determine the last txid in this log group.
*/
boolean hasKnownLastTxId() {
for (EditLogFile log : logs) {
if (!log.isInProgress()) {
return true;
}
}
return false;
}
/**
* @return the last txid included in the logs in this group
* @throws IllegalStateException if it is unknown -
* {@see #hasKnownLastTxId()}
*/
long getLastTxId() {
for (EditLogFile log : logs) {
if (!log.isInProgress()) {
return log.getLastTxId();
}
}
throw new IllegalStateException("LogGroup only has in-progress logs");
}
void add(EditLogFile log) {
assert log.getFirstTxId() == startTxId;
logs.add(log);
if (log.isInProgress()) {
hasInProgress = true;
} else {
hasFinalized = true;
endTxIds.add(log.getLastTxId());
}
}
void planRecovery() throws IOException {
assert hasInProgress || hasFinalized;
checkConsistentEndTxIds();
if (hasFinalized && hasInProgress) {
planMixedLogRecovery();
} else if (!hasFinalized && hasInProgress) {
planAllInProgressRecovery();
} else if (hasFinalized && !hasInProgress) {
LOG.debug("No recovery necessary for logs starting at txid " +
startTxId);
}
}
/**
* Recovery case for when some logs in the group were in-progress, and
* others were finalized. This happens when one of the storage
* directories fails.
*
* The in-progress logs in this case should be considered corrupt.
*/
private void planMixedLogRecovery() throws IOException {
for (EditLogFile log : logs) {
if (log.isInProgress()) {
LOG.warn("Log at " + log.getFile() + " is in progress, but " +
"other logs starting at the same txid " + startTxId +
" are finalized. Moving aside.");
log.markCorrupt();
}
}
}
/**
* Recovery case for when all of the logs in the group were in progress.
* This happens if the NN completely crashes and restarts. In this case
* we check the non-zero lengths of each log file, and any logs that are
* less than the max of these lengths are considered corrupt.
*/
private void planAllInProgressRecovery() throws IOException {
// We only have in-progress logs. We need to figure out which logs have
// the latest data to reccover them
LOG.warn("Logs beginning at txid " + startTxId + " were are all " +
"in-progress (probably truncated due to a previous NameNode " +
"crash)");
if (logs.size() == 1) {
// Only one log, it's our only choice!
EditLogFile log = logs.get(0);
if (log.validateLog().numTransactions == 0) {
// If it has no transactions, we should consider it corrupt just
// to be conservative.
// See comment below for similar case
LOG.warn("Marking log at " + log.getFile() + " as corrupt since " +
"it has no transactions in it.");
log.markCorrupt();
}
return;
}
long maxValidTxnCount = Long.MIN_VALUE;
for (EditLogFile log : logs) {
long validTxnCount = log.validateLog().numTransactions;
LOG.warn(" Log " + log.getFile() +
" valid txns=" + validTxnCount +
" valid len=" + log.validateLog().validLength);
maxValidTxnCount = Math.max(maxValidTxnCount, validTxnCount);
}
for (EditLogFile log : logs) {
long txns = log.validateLog().numTransactions;
if (txns < maxValidTxnCount) {
LOG.warn("Marking log at " + log.getFile() + " as corrupt since " +
"it is has only " + txns + " valid txns whereas another " +
"log has " + maxValidTxnCount);
log.markCorrupt();
} else if (txns == 0) {
// this can happen if the NN crashes right after rolling a log
// but before the START_LOG_SEGMENT txn is written. Since the log
// is empty, we can just move it aside to its corrupt name.
LOG.warn("Marking log at " + log.getFile() + " as corrupt since " +
"it has no transactions in it.");
log.markCorrupt();
}
}
}
/**
* Check for the case when we have multiple finalized logs and they have
* different ending transaction IDs. This violates an invariant that all
* log directories should roll together. We should abort in this case.
*/
private void checkConsistentEndTxIds() throws IOException {
if (hasFinalized && endTxIds.size() > 1) {
throw new IOException("More than one ending txid was found " +
"for logs starting at txid " + startTxId + ". " +
"Found: " + StringUtils.join(endTxIds, ','));
}
}
void recover() throws IOException {
for (EditLogFile log : logs) {
if (log.isCorrupt()) {
log.moveAsideCorruptFile();
} else if (log.isInProgress()) {
log.finalizeLog();
}
}
}
}
static class TransactionalLoadPlan extends LoadPlan {
final FSImageFile image;
final LogLoadPlan logPlan;
public TransactionalLoadPlan(FSImageFile image,
LogLoadPlan logPlan) {
super();
this.image = image;
this.logPlan = logPlan;
}
@Override
boolean doRecovery() throws IOException {
logPlan.doRecovery();
return false;
}
@Override
File getImageFile() {
return image.getFile();
}
@Override
List<File> getEditsFiles() {
return logPlan.getEditsFiles();
}
@Override
StorageDirectory getStorageDirectoryForProperties() {
return image.sd;
}
}
static class LogLoadPlan {
final List<EditLogFile> editLogs;
final List<LogGroup> logGroupsToRecover;
LogLoadPlan(List<EditLogFile> editLogs,
List<LogGroup> logGroupsToRecover) {
this.editLogs = editLogs;
this.logGroupsToRecover = logGroupsToRecover;
}
public void doRecovery() throws IOException {
for (LogGroup g : logGroupsToRecover) {
g.recover();
}
}
public List<File> getEditsFiles() {
List<File> ret = new ArrayList<File>();
for (EditLogFile log : editLogs) {
ret.add(log.getFile());
}
return ret;
}
}
}