blob: a308c230cb0ac59a254c9819524e1016615c869f [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType.DATA_NODE;
import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType.NAME_NODE;
import java.io.File;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.TestParallelImageWrite;
import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getInProgressEditsFileName;
import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getImageFileName;
import org.apache.hadoop.util.StringUtils;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import static org.junit.Assert.*;
/**
* This test ensures the appropriate response (successful or failure) from
* the system when the system is upgraded under various storage state and
* version conditions.
*/
public class TestDFSUpgrade {
private static final int EXPECTED_TXID = 17;
private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
private Configuration conf;
private int testCounter = 0;
private MiniDFSCluster cluster = null;
/**
* Writes an INFO log message containing the parameters.
*/
void log(String label, int numDirs) {
LOG.info("============================================================");
LOG.info("***TEST " + (testCounter++) + "*** "
+ label + ":"
+ " numDirs="+numDirs);
}
/**
* For namenode, Verify that the current and previous directories exist.
* Verify that previous hasn't been modified by comparing the checksum of all
* its files with their original checksum. It is assumed that the
* server has recovered and upgraded.
*/
void checkNameNode(String[] baseDirs, long imageTxId) throws IOException {
for (String baseDir : baseDirs) {
LOG.info("Checking namenode directory " + baseDir);
LOG.info("==== Contents ====:\n " +
Joiner.on(" \n").join(new File(baseDir, "current").list()));
LOG.info("==================");
assertTrue(new File(baseDir,"current").isDirectory());
assertTrue(new File(baseDir,"current/VERSION").isFile());
assertTrue(new File(baseDir,"current/"
+ getInProgressEditsFileName(imageTxId + 1)).isFile());
assertTrue(new File(baseDir,"current/"
+ getImageFileName(imageTxId)).isFile());
assertTrue(new File(baseDir,"current/seen_txid").isFile());
File previous = new File(baseDir, "previous");
assertTrue(previous.isDirectory());
assertEquals(UpgradeUtilities.checksumContents(NAME_NODE, previous),
UpgradeUtilities.checksumMasterNameNodeContents());
}
}
/**
* For datanode, for a block pool, verify that the current and previous
* directories exist. Verify that previous hasn't been modified by comparing
* the checksum of all its files with their original checksum. It
* is assumed that the server has recovered and upgraded.
*/
void checkDataNode(String[] baseDirs, String bpid) throws IOException {
for (int i = 0; i < baseDirs.length; i++) {
File current = new File(baseDirs[i], "current/" + bpid + "/current");
assertEquals(UpgradeUtilities.checksumContents(DATA_NODE, current),
UpgradeUtilities.checksumMasterDataNodeContents());
// block files are placed under <sd>/current/<bpid>/current/finalized
File currentFinalized =
MiniDFSCluster.getFinalizedDir(new File(baseDirs[i]), bpid);
assertEquals(UpgradeUtilities.checksumContents(DATA_NODE, currentFinalized),
UpgradeUtilities.checksumMasterBlockPoolFinalizedContents());
File previous = new File(baseDirs[i], "current/" + bpid + "/previous");
assertTrue(previous.isDirectory());
assertEquals(UpgradeUtilities.checksumContents(DATA_NODE, previous),
UpgradeUtilities.checksumMasterDataNodeContents());
File previousFinalized =
new File(baseDirs[i], "current/" + bpid + "/previous"+"/finalized");
assertEquals(UpgradeUtilities.checksumContents(DATA_NODE, previousFinalized),
UpgradeUtilities.checksumMasterBlockPoolFinalizedContents());
}
}
/**
* Attempts to start a NameNode with the given operation. Starting
* the NameNode should throw an exception.
*/
void startNameNodeShouldFail(StartupOption operation) {
startNameNodeShouldFail(operation, null, null);
}
/**
* Attempts to start a NameNode with the given operation. Starting
* the NameNode should throw an exception.
* @param operation - NameNode startup operation
* @param exceptionClass - if non-null, will check that the caught exception
* is assignment-compatible with exceptionClass
* @param messagePattern - if non-null, will check that a substring of the
* message from the caught exception matches this pattern, via the
* {@link Matcher#find()} method.
*/
void startNameNodeShouldFail(StartupOption operation,
Class<? extends Exception> exceptionClass, Pattern messagePattern) {
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.startupOption(operation)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
.build(); // should fail
fail("NameNode should have failed to start");
} catch (Exception e) {
// expect exception
if (exceptionClass != null) {
assertTrue("Caught exception is not of expected class "
+ exceptionClass.getSimpleName() + ": "
+ StringUtils.stringifyException(e),
exceptionClass.isInstance(e));
}
if (messagePattern != null) {
assertTrue("Caught exception message string does not match expected pattern \""
+ messagePattern.pattern() + "\" : "
+ StringUtils.stringifyException(e),
messagePattern.matcher(e.getMessage()).find());
}
LOG.info("Successfully detected expected NameNode startup failure.");
}
}
/**
* Attempts to start a DataNode with the given operation. Starting
* the given block pool should fail.
* @param operation startup option
* @param bpid block pool Id that should fail to start
* @throws IOException
*/
void startBlockPoolShouldFail(StartupOption operation, String bpid) throws IOException {
cluster.startDataNodes(conf, 1, false, operation, null); // should fail
assertFalse("Block pool " + bpid + " should have failed to start",
cluster.getDataNodes().get(0).isBPServiceAlive(bpid));
}
/**
* Create an instance of a newly configured cluster for testing that does
* not manage its own directories or files
*/
private MiniDFSCluster createCluster() throws IOException {
return new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
.startupOption(StartupOption.UPGRADE)
.build();
}
@BeforeClass
public static void initialize() throws Exception {
UpgradeUtilities.initialize();
}
/**
* This test attempts to upgrade the NameNode and DataNode under
* a number of valid and invalid conditions.
*/
@Test
public void testUpgrade() throws Exception {
File[] baseDirs;
StorageInfo storageInfo = null;
for (int numDirs = 1; numDirs <= 2; numDirs++) {
conf = new HdfsConfiguration();
conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
String[] nameNodeDirs = conf.getStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
String[] dataNodeDirs = conf.getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
log("Normal NameNode upgrade", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
checkNameNode(nameNodeDirs, EXPECTED_TXID);
if (numDirs > 1)
TestParallelImageWrite.checkImages(cluster.getNamesystem(), numDirs);
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("Normal DataNode upgrade", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
cluster.startDataNodes(conf, 1, false, StartupOption.REGULAR, null);
checkDataNode(dataNodeDirs, UpgradeUtilities.getCurrentBlockPoolID(null));
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
log("NameNode upgrade with existing previous dir", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
startNameNodeShouldFail(StartupOption.UPGRADE);
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("DataNode upgrade with existing previous dir", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "previous");
cluster.startDataNodes(conf, 1, false, StartupOption.REGULAR, null);
checkDataNode(dataNodeDirs, UpgradeUtilities.getCurrentBlockPoolID(null));
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
log("DataNode upgrade with future stored layout version in current", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
baseDirs = UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
storageInfo = new StorageInfo(Integer.MIN_VALUE,
UpgradeUtilities.getCurrentNamespaceID(cluster),
UpgradeUtilities.getCurrentClusterID(cluster),
UpgradeUtilities.getCurrentFsscTime(cluster));
UpgradeUtilities.createDataNodeVersionFile(baseDirs, storageInfo,
UpgradeUtilities.getCurrentBlockPoolID(cluster));
startBlockPoolShouldFail(StartupOption.REGULAR, UpgradeUtilities
.getCurrentBlockPoolID(null));
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
log("DataNode upgrade with newer fsscTime in current", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
baseDirs = UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
storageInfo = new StorageInfo(UpgradeUtilities.getCurrentLayoutVersion(),
UpgradeUtilities.getCurrentNamespaceID(cluster),
UpgradeUtilities.getCurrentClusterID(cluster), Long.MAX_VALUE);
UpgradeUtilities.createDataNodeVersionFile(baseDirs, storageInfo,
UpgradeUtilities.getCurrentBlockPoolID(cluster));
// Ensure corresponding block pool failed to initialized
startBlockPoolShouldFail(StartupOption.REGULAR, UpgradeUtilities
.getCurrentBlockPoolID(null));
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
log("NameNode upgrade with no edits file", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
deleteStorageFilesWithPrefix(nameNodeDirs, "edits_");
startNameNodeShouldFail(StartupOption.UPGRADE);
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("NameNode upgrade with no image file", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
deleteStorageFilesWithPrefix(nameNodeDirs, "fsimage_");
startNameNodeShouldFail(StartupOption.UPGRADE);
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("NameNode upgrade with corrupt version file", numDirs);
baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
for (File f : baseDirs) {
UpgradeUtilities.corruptFile(
new File(f,"VERSION"),
"layoutVersion".getBytes(Charsets.UTF_8),
"xxxxxxxxxxxxx".getBytes(Charsets.UTF_8));
}
startNameNodeShouldFail(StartupOption.UPGRADE);
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("NameNode upgrade with old layout version in current", numDirs);
baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
storageInfo = new StorageInfo(Storage.LAST_UPGRADABLE_LAYOUT_VERSION + 1,
UpgradeUtilities.getCurrentNamespaceID(null),
UpgradeUtilities.getCurrentClusterID(null),
UpgradeUtilities.getCurrentFsscTime(null));
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
UpgradeUtilities.getCurrentBlockPoolID(cluster));
startNameNodeShouldFail(StartupOption.UPGRADE);
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("NameNode upgrade with future layout version in current", numDirs);
baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
storageInfo = new StorageInfo(Integer.MIN_VALUE,
UpgradeUtilities.getCurrentNamespaceID(null),
UpgradeUtilities.getCurrentClusterID(null),
UpgradeUtilities.getCurrentFsscTime(null));
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
UpgradeUtilities.getCurrentBlockPoolID(cluster));
startNameNodeShouldFail(StartupOption.UPGRADE);
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
} // end numDir loop
// One more check: normal NN upgrade with 4 directories, concurrent write
int numDirs = 4;
{
conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
String[] nameNodeDirs = conf.getStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
log("Normal NameNode upgrade", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
checkNameNode(nameNodeDirs, EXPECTED_TXID);
TestParallelImageWrite.checkImages(cluster.getNamesystem(), numDirs);
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
}
}
/*
* Stand-alone test to detect failure of one SD during parallel upgrade.
* At this time, can only be done with manual hack of {@link FSImage.doUpgrade()}
*/
@Ignore
public void testUpgrade4() throws Exception {
int numDirs = 4;
conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
String[] nameNodeDirs = conf.getStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
log("NameNode upgrade with one bad storage dir", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
try {
// assert("storage dir has been prepared for failure before reaching this point");
startNameNodeShouldFail(StartupOption.UPGRADE, IOException.class,
Pattern.compile("failed in 1 storage"));
} finally {
// assert("storage dir shall be returned to normal state before exiting");
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
}
}
private void deleteStorageFilesWithPrefix(String[] nameNodeDirs, String prefix)
throws Exception {
for (String baseDirStr : nameNodeDirs) {
File baseDir = new File(baseDirStr);
File currentDir = new File(baseDir, "current");
for (File f : currentDir.listFiles()) {
if (f.getName().startsWith(prefix)) {
assertTrue("Deleting " + f, f.delete());
}
}
}
}
@Test(expected=IOException.class)
public void testUpgradeFromPreUpgradeLVFails() throws IOException {
// Upgrade from versions prior to Storage#LAST_UPGRADABLE_LAYOUT_VERSION
// is not allowed
Storage.checkVersionUpgradable(Storage.LAST_PRE_UPGRADE_LAYOUT_VERSION + 1);
fail("Expected IOException is not thrown");
}
@Ignore
public void test203LayoutVersion() {
for (int lv : Storage.LAYOUT_VERSIONS_203) {
assertTrue(Storage.is203LayoutVersion(lv));
}
}
public static void main(String[] args) throws Exception {
TestDFSUpgrade t = new TestDFSUpgrade();
TestDFSUpgrade.initialize();
t.testUpgrade();
}
}