blob: 61cf90a19cd7fc5548a9cc1b9d3ba9d2058e4dfd [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.cli.TestHDFSCLI;
import org.apache.hadoop.cli.util.CommandExecutor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
import org.apache.hadoop.hdfs.DFSConfigKeys;
/**
* Startup and checkpoint tests
*
*/
public class TestStorageRestore extends TestCase {
public static final String NAME_NODE_HOST = "localhost:";
public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:";
private static final Log LOG =
LogFactory.getLog(TestStorageRestore.class.getName());
private Configuration config;
private File hdfsDir=null;
static final long seed = 0xAAAAEEFL;
static final int blockSize = 4096;
static final int fileSize = 8192;
private File path1, path2, path3;
private MiniDFSCluster cluster;
private void writeFile(FileSystem fileSys, Path name, int repl)
throws IOException {
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, (long)blockSize);
byte[] buffer = new byte[fileSize];
Random rand = new Random(seed);
rand.nextBytes(buffer);
stm.write(buffer);
stm.close();
}
protected void setUp() throws Exception {
config = new HdfsConfiguration();
hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile();
if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
throw new IOException("Could not delete hdfs directory '" + hdfsDir + "'");
}
hdfsDir.mkdirs();
path1 = new File(hdfsDir, "name1");
path2 = new File(hdfsDir, "name2");
path3 = new File(hdfsDir, "name3");
path1.mkdir(); path2.mkdir(); path3.mkdir();
if(!path2.exists() || !path3.exists() || !path1.exists()) {
throw new IOException("Couldn't create dfs.name dirs in " + hdfsDir.getAbsolutePath());
}
String dfs_name_dir = new String(path1.getPath() + "," + path2.getPath());
System.out.println("configuring hdfsdir is " + hdfsDir.getAbsolutePath() +
"; dfs_name_dir = "+ dfs_name_dir + ";dfs_name_edits_dir(only)=" + path3.getPath());
config.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, dfs_name_dir);
config.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, dfs_name_dir + "," + path3.getPath());
config.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,new File(hdfsDir, "secondary").getPath());
FileSystem.setDefaultUri(config, "hdfs://"+NAME_NODE_HOST + "0");
config.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
// set the restore feature on
config.setBoolean(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_KEY, true);
}
/**
* clean up
*/
public void tearDown() throws Exception {
if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
throw new IOException("Could not delete hdfs directory in tearDown '" + hdfsDir + "'");
}
}
/**
* invalidate storage by removing storage directories
*/
public void invalidateStorage(FSImage fi) throws IOException {
ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2);
Iterator<StorageDirectory> it = fi.dirIterator();
while(it.hasNext()) {
StorageDirectory sd = it.next();
if(sd.getRoot().equals(path2) || sd.getRoot().equals(path3)) {
al.add(sd);
}
}
// simulate an error
fi.processIOError(al, true);
}
/**
* test
*/
public void printStorages(FSImage fs) {
LOG.info("current storages and corresoponding sizes:");
for(Iterator<StorageDirectory> it = fs.dirIterator(); it.hasNext(); ) {
StorageDirectory sd = it.next();
if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
File imf = FSImage.getImageFile(sd, NameNodeFile.IMAGE);
LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length());
}
if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
File edf = FSImage.getImageFile(sd, NameNodeFile.EDITS);
LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length());
}
}
}
/**
* This function returns a md5 hash of a file.
*
* @param file input file
* @return The md5 string
*/
public String getFileMD5(File file) throws Exception {
String res = new String();
MessageDigest mD = MessageDigest.getInstance("MD5");
DataInputStream dis = new DataInputStream(new FileInputStream(file));
try {
while(true) {
mD.update(dis.readByte());
}
} catch (EOFException eof) {}
BigInteger bigInt = new BigInteger(1, mD.digest());
res = bigInt.toString(16);
dis.close();
return res;
}
/**
* read currentCheckpointTime directly from the file
* @param currDir
* @return the checkpoint time
* @throws IOException
*/
long readCheckpointTime(File currDir) throws IOException {
File timeFile = new File(currDir, NameNodeFile.TIME.getName());
long timeStamp = 0L;
if (timeFile.exists() && timeFile.canRead()) {
DataInputStream in = new DataInputStream(new FileInputStream(timeFile));
try {
timeStamp = in.readLong();
} finally {
in.close();
}
}
return timeStamp;
}
/**
* check if files exist/not exist
* @throws IOException
*/
public void checkFiles(boolean valid) throws IOException {
//look at the valid storage
File fsImg1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName());
File fsImg2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName());
File fsImg3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName());
File fsEdits1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName());
File fsEdits2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName());
File fsEdits3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName());
long chkPt1 = readCheckpointTime(new File(path1, Storage.STORAGE_DIR_CURRENT));
long chkPt2 = readCheckpointTime(new File(path2, Storage.STORAGE_DIR_CURRENT));
long chkPt3 = readCheckpointTime(new File(path3, Storage.STORAGE_DIR_CURRENT));
String md5_1 = null,md5_2 = null,md5_3 = null;
try {
md5_1 = getFileMD5(fsEdits1);
md5_2 = getFileMD5(fsEdits2);
md5_3 = getFileMD5(fsEdits3);
} catch (Exception e) {
System.err.println("md 5 calculation failed:" + e.getLocalizedMessage());
}
this.printStorages(cluster.getNameNode().getFSImage());
LOG.info("++++ image files = "+fsImg1.getAbsolutePath() + "," + fsImg2.getAbsolutePath() + ","+ fsImg3.getAbsolutePath());
LOG.info("++++ edits files = "+fsEdits1.getAbsolutePath() + "," + fsEdits2.getAbsolutePath() + ","+ fsEdits3.getAbsolutePath());
LOG.info("checkFiles compares lengths: img1=" + fsImg1.length() + ",img2=" + fsImg2.length() + ",img3=" + fsImg3.length());
LOG.info("checkFiles compares lengths: edits1=" + fsEdits1.length() + ",edits2=" + fsEdits2.length() + ",edits3=" + fsEdits3.length());
LOG.info("checkFiles compares chkPts: name1=" + chkPt1 + ",name2=" + chkPt2 + ",name3=" + chkPt3);
LOG.info("checkFiles compares md5s: " + fsEdits1.getAbsolutePath() +
"="+ md5_1 + "," + fsEdits2.getAbsolutePath() + "=" + md5_2 + "," +
fsEdits3.getAbsolutePath() + "=" + md5_3);
if(valid) {
// should be the same
assertTrue(fsImg1.length() == fsImg2.length());
assertTrue(0 == fsImg3.length()); //shouldn't be created
assertTrue(fsEdits1.length() == fsEdits2.length());
assertTrue(fsEdits1.length() == fsEdits3.length());
assertTrue(md5_1.equals(md5_2));
assertTrue(md5_1.equals(md5_3));
// checkpoint times
assertTrue(chkPt1 == chkPt2);
assertTrue(chkPt1 == chkPt3);
} else {
// should be different
//assertTrue(fsImg1.length() != fsImg2.length());
//assertTrue(fsImg1.length() != fsImg3.length());
assertTrue("edits1 = edits2", fsEdits1.length() != fsEdits2.length());
assertTrue("edits1 = edits3", fsEdits1.length() != fsEdits3.length());
assertTrue(!md5_1.equals(md5_2));
assertTrue(!md5_1.equals(md5_3));
// checkpoint times
assertTrue(chkPt1 > chkPt2);
assertTrue(chkPt1 > chkPt3);
}
}
/**
* test
* 1. create DFS cluster with 3 storage directories - 2 EDITS_IMAGE, 1 EDITS
* 2. create a cluster and write a file
* 3. corrupt/disable one storage (or two) by removing
* 4. run doCheckpoint - it will fail on removed dirs (which
* will invalidate the storages)
* 5. write another file
* 6. check that edits and fsimage differ
* 7. run doCheckpoint
* 8. verify that all the image and edits files are the same.
*/
@SuppressWarnings("deprecation")
public void testStorageRestore() throws Exception {
int numDatanodes = 2;
cluster = new MiniDFSCluster(0, config, numDatanodes, true, false, true, null, null, null, null);
cluster.waitActive();
SecondaryNameNode secondary = new SecondaryNameNode(config);
System.out.println("****testStorageRestore: Cluster and SNN started");
printStorages(cluster.getNameNode().getFSImage());
FileSystem fs = cluster.getFileSystem();
Path path = new Path("/", "test");
writeFile(fs, path, 2);
System.out.println("****testStorageRestore: file test written, invalidating storage...");
invalidateStorage(cluster.getNameNode().getFSImage());
//secondary.doCheckpoint(); // this will cause storages to be removed.
printStorages(cluster.getNameNode().getFSImage());
System.out.println("****testStorageRestore: storage invalidated + doCheckpoint");
path = new Path("/", "test1");
writeFile(fs, path, 2);
System.out.println("****testStorageRestore: file test1 written");
checkFiles(false); // SHOULD BE FALSE
System.out.println("****testStorageRestore: checkfiles(false) run");
secondary.doCheckpoint(); ///should enable storage..
checkFiles(true);
System.out.println("****testStorageRestore: second Checkpoint done and checkFiles(true) run");
// verify that all the logs are active
path = new Path("/", "test2");
writeFile(fs, path, 2);
System.out.println("****testStorageRestore: wrote a file and checkFiles(true) run");
checkFiles(true);
secondary.shutdown();
cluster.shutdown();
}
/**
* Test dfsadmin -restoreFailedStorage command
* @throws Exception
*/
public void testDfsAdminCmd() throws Exception {
int numDatanodes = 2;
cluster = new MiniDFSCluster(0, config, numDatanodes, true, false, true, null, null, null, null);
cluster.waitActive();
try {
FSImage fsi = cluster.getNameNode().getFSImage();
// it is started with dfs.name.dir.restore set to true (in SetUp())
boolean restore = fsi.getRestoreFailedStorage();
LOG.info("Restore is " + restore);
assertEquals(restore, true);
// now run DFSAdmnin command
String cmd = "-fs NAMENODE -restoreFailedStorage false";
String namenode = config.get(DFSConfigKeys.FS_DEFAULT_NAME_KEY, "file:///");
CommandExecutor executor = new TestHDFSCLI.DFSAdminCmdExecutor(namenode);
executor.executeCommand(cmd);
restore = fsi.getRestoreFailedStorage();
LOG.info("After set true call restore is " + restore);
assertEquals(restore, false);
// run one more time - to set it to true again
cmd = "-fs NAMENODE -restoreFailedStorage true";
executor.executeCommand(cmd);
restore = fsi.getRestoreFailedStorage();
LOG.info("After set false call restore is " + restore);
assertEquals(restore, true);
// run one more time - no change in value
cmd = "-fs NAMENODE -restoreFailedStorage check";
CommandExecutor.Result cmdResult = executor.executeCommand(cmd);
restore = fsi.getRestoreFailedStorage();
LOG.info("After check call restore is " + restore);
assertEquals(restore, true);
String commandOutput = cmdResult.getCommandOutput();
commandOutput.trim();
assertTrue(commandOutput.contains("restoreFailedStorage is set to true"));
} finally {
cluster.shutdown();
}
}
}