| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hdfs.server.namenode; |
| |
| |
| import java.io.DataInputStream; |
| import java.io.EOFException; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.math.BigInteger; |
| import java.security.MessageDigest; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.Random; |
| |
| import junit.framework.TestCase; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.cli.CmdFactoryDFS; |
| import org.apache.hadoop.cli.util.CLITestData; |
| import org.apache.hadoop.cli.util.CommandExecutor; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FSDataOutputStream; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.FileUtil; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hdfs.HdfsConfiguration; |
| import org.apache.hadoop.hdfs.MiniDFSCluster; |
| import org.apache.hadoop.hdfs.server.common.Storage; |
| import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; |
| import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType; |
| import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile; |
| import org.apache.hadoop.hdfs.DFSConfigKeys; |
| |
| |
| /** |
| * Startup and checkpoint tests |
| * |
| */ |
| public class TestStorageRestore extends TestCase { |
| public static final String NAME_NODE_HOST = "localhost:"; |
| public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:"; |
| private static final Log LOG = |
| LogFactory.getLog(TestStorageRestore.class.getName()); |
| private Configuration config; |
| private File hdfsDir=null; |
| static final long seed = 0xAAAAEEFL; |
| static final int blockSize = 4096; |
| static final int fileSize = 8192; |
| private File path1, path2, path3; |
| private MiniDFSCluster cluster; |
| |
| private void writeFile(FileSystem fileSys, Path name, int repl) |
| throws IOException { |
| FSDataOutputStream stm = fileSys.create(name, true, |
| fileSys.getConf().getInt("io.file.buffer.size", 4096), |
| (short)repl, (long)blockSize); |
| byte[] buffer = new byte[fileSize]; |
| Random rand = new Random(seed); |
| rand.nextBytes(buffer); |
| stm.write(buffer); |
| stm.close(); |
| } |
| |
| |
| protected void setUp() throws Exception { |
| config = new HdfsConfiguration(); |
| String baseDir = System.getProperty("test.build.data", "build/test/data"); |
| |
| hdfsDir = new File(baseDir, "dfs"); |
| if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) { |
| throw new IOException("Could not delete hdfs directory '" + hdfsDir + "'"); |
| } |
| |
| hdfsDir.mkdirs(); |
| path1 = new File(hdfsDir, "name1"); |
| path2 = new File(hdfsDir, "name2"); |
| path3 = new File(hdfsDir, "name3"); |
| |
| path1.mkdir(); path2.mkdir(); path3.mkdir(); |
| if(!path2.exists() || !path3.exists() || !path1.exists()) { |
| throw new IOException("Couldn't create dfs.name dirs in " + hdfsDir.getAbsolutePath()); |
| } |
| |
| String dfs_name_dir = new String(path1.getPath() + "," + path2.getPath()); |
| System.out.println("configuring hdfsdir is " + hdfsDir.getAbsolutePath() + |
| "; dfs_name_dir = "+ dfs_name_dir + ";dfs_name_edits_dir(only)=" + path3.getPath()); |
| |
| config.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, dfs_name_dir); |
| config.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, dfs_name_dir + "," + path3.getPath()); |
| |
| config.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,new File(hdfsDir, "secondary").getPath()); |
| |
| FileSystem.setDefaultUri(config, "hdfs://"+NAME_NODE_HOST + "0"); |
| |
| config.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0"); |
| |
| // set the restore feature on |
| config.setBoolean(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_KEY, true); |
| } |
| |
| /** |
| * clean up |
| */ |
| public void tearDown() throws Exception { |
| if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) { |
| throw new IOException("Could not delete hdfs directory in tearDown '" + hdfsDir + "'"); |
| } |
| } |
| |
| /** |
| * invalidate storage by removing storage directories |
| */ |
| public void invalidateStorage(FSImage fi) throws IOException { |
| ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2); |
| Iterator<StorageDirectory> it = fi.dirIterator(); |
| while(it.hasNext()) { |
| StorageDirectory sd = it.next(); |
| if(sd.getRoot().getAbsolutePath().equals(path2.getAbsolutePath()) || |
| sd.getRoot().getAbsolutePath().equals(path3.getAbsolutePath())) { |
| al.add(sd); |
| } |
| } |
| // simulate an error |
| fi.processIOError(al, true); |
| } |
| |
| /** |
| * test |
| */ |
| public void printStorages(FSImage fs) { |
| LOG.info("current storages and corresoponding sizes:"); |
| for(Iterator<StorageDirectory> it = fs.dirIterator(); it.hasNext(); ) { |
| StorageDirectory sd = it.next(); |
| |
| if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) { |
| File imf = FSImage.getImageFile(sd, NameNodeFile.IMAGE); |
| LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length()); |
| } |
| if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) { |
| File edf = FSImage.getImageFile(sd, NameNodeFile.EDITS); |
| LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length()); |
| } |
| } |
| } |
| |
| |
| /** |
| * This function returns a md5 hash of a file. |
| * |
| * @param file input file |
| * @return The md5 string |
| */ |
| public String getFileMD5(File file) throws Exception { |
| String res = new String(); |
| MessageDigest mD = MessageDigest.getInstance("MD5"); |
| DataInputStream dis = new DataInputStream(new FileInputStream(file)); |
| |
| try { |
| while(true) { |
| mD.update(dis.readByte()); |
| } |
| } catch (EOFException eof) {} |
| |
| BigInteger bigInt = new BigInteger(1, mD.digest()); |
| res = bigInt.toString(16); |
| dis.close(); |
| |
| return res; |
| } |
| |
| |
| /** |
| * read currentCheckpointTime directly from the file |
| * @param currDir |
| * @return the checkpoint time |
| * @throws IOException |
| */ |
| long readCheckpointTime(File currDir) throws IOException { |
| File timeFile = new File(currDir, NameNodeFile.TIME.getName()); |
| long timeStamp = 0L; |
| if (timeFile.exists() && timeFile.canRead()) { |
| DataInputStream in = new DataInputStream(new FileInputStream(timeFile)); |
| try { |
| timeStamp = in.readLong(); |
| } finally { |
| in.close(); |
| } |
| } |
| return timeStamp; |
| } |
| |
| /** |
| * check if files exist/not exist |
| * @throws IOException |
| */ |
| public void checkFiles(boolean valid) throws IOException { |
| //look at the valid storage |
| File fsImg1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); |
| File fsImg2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); |
| File fsImg3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); |
| |
| File fsEdits1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); |
| File fsEdits2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); |
| File fsEdits3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); |
| |
| long chkPt1 = readCheckpointTime(new File(path1, Storage.STORAGE_DIR_CURRENT)); |
| long chkPt2 = readCheckpointTime(new File(path2, Storage.STORAGE_DIR_CURRENT)); |
| long chkPt3 = readCheckpointTime(new File(path3, Storage.STORAGE_DIR_CURRENT)); |
| |
| String md5_1 = null,md5_2 = null,md5_3 = null; |
| try { |
| md5_1 = getFileMD5(fsEdits1); |
| md5_2 = getFileMD5(fsEdits2); |
| md5_3 = getFileMD5(fsEdits3); |
| } catch (Exception e) { |
| System.err.println("md 5 calculation failed:" + e.getLocalizedMessage()); |
| } |
| this.printStorages(cluster.getNameNode().getFSImage()); |
| |
| LOG.info("++++ image files = "+fsImg1.getAbsolutePath() + "," + fsImg2.getAbsolutePath() + ","+ fsImg3.getAbsolutePath()); |
| LOG.info("++++ edits files = "+fsEdits1.getAbsolutePath() + "," + fsEdits2.getAbsolutePath() + ","+ fsEdits3.getAbsolutePath()); |
| LOG.info("checkFiles compares lengths: img1=" + fsImg1.length() + ",img2=" + fsImg2.length() + ",img3=" + fsImg3.length()); |
| LOG.info("checkFiles compares lengths: edits1=" + fsEdits1.length() + ",edits2=" + fsEdits2.length() + ",edits3=" + fsEdits3.length()); |
| LOG.info("checkFiles compares chkPts: name1=" + chkPt1 + ",name2=" + chkPt2 + ",name3=" + chkPt3); |
| LOG.info("checkFiles compares md5s: " + fsEdits1.getAbsolutePath() + |
| "="+ md5_1 + "," + fsEdits2.getAbsolutePath() + "=" + md5_2 + "," + |
| fsEdits3.getAbsolutePath() + "=" + md5_3); |
| |
| if(valid) { |
| // should be the same |
| assertTrue(fsImg1.length() == fsImg2.length()); |
| assertTrue(0 == fsImg3.length()); //shouldn't be created |
| assertTrue(fsEdits1.length() == fsEdits2.length()); |
| assertTrue(fsEdits1.length() == fsEdits3.length()); |
| assertTrue(md5_1.equals(md5_2)); |
| assertTrue(md5_1.equals(md5_3)); |
| |
| // checkpoint times |
| assertTrue(chkPt1 == chkPt2); |
| assertTrue(chkPt1 == chkPt3); |
| } else { |
| // should be different |
| //assertTrue(fsImg1.length() != fsImg2.length()); |
| //assertTrue(fsImg1.length() != fsImg3.length()); |
| assertTrue(fsEdits1.length() != fsEdits2.length()); |
| assertTrue(fsEdits1.length() != fsEdits3.length()); |
| |
| assertTrue(!md5_1.equals(md5_2)); |
| assertTrue(!md5_1.equals(md5_3)); |
| |
| |
| // checkpoint times |
| assertTrue(chkPt1 > chkPt2); |
| assertTrue(chkPt1 > chkPt3); |
| } |
| } |
| |
| /** |
| * test |
| * 1. create DFS cluster with 3 storage directories - 2 EDITS_IMAGE, 1 EDITS |
| * 2. create a cluster and write a file |
| * 3. corrupt/disable one storage (or two) by removing |
| * 4. run doCheckpoint - it will fail on removed dirs (which |
| * will invalidate the storages) |
| * 5. write another file |
| * 6. check that edits and fsimage differ |
| * 7. run doCheckpoint |
| * 8. verify that all the image and edits files are the same. |
| */ |
| @SuppressWarnings("deprecation") |
| public void testStorageRestore() throws Exception { |
| int numDatanodes = 2; |
| cluster = new MiniDFSCluster(0, config, numDatanodes, true, false, true, null, null, null, null); |
| cluster.waitActive(); |
| |
| SecondaryNameNode secondary = new SecondaryNameNode(config); |
| System.out.println("****testStorageRestore: Cluster and SNN started"); |
| printStorages(cluster.getNameNode().getFSImage()); |
| |
| FileSystem fs = cluster.getFileSystem(); |
| Path path = new Path("/", "test"); |
| writeFile(fs, path, 2); |
| |
| System.out.println("****testStorageRestore: file test written, invalidating storage..."); |
| |
| invalidateStorage(cluster.getNameNode().getFSImage()); |
| //secondary.doCheckpoint(); // this will cause storages to be removed. |
| printStorages(cluster.getNameNode().getFSImage()); |
| System.out.println("****testStorageRestore: storage invalidated + doCheckpoint"); |
| |
| path = new Path("/", "test1"); |
| writeFile(fs, path, 2); |
| System.out.println("****testStorageRestore: file test1 written"); |
| |
| checkFiles(false); // SHOULD BE FALSE |
| |
| System.out.println("****testStorageRestore: checkfiles(false) run"); |
| |
| secondary.doCheckpoint(); ///should enable storage.. |
| |
| checkFiles(true); |
| System.out.println("****testStorageRestore: second Checkpoint done and checkFiles(true) run"); |
| |
| // verify that all the logs are active |
| path = new Path("/", "test2"); |
| writeFile(fs, path, 2); |
| System.out.println("****testStorageRestore: wrote a file and checkFiles(true) run"); |
| checkFiles(true); |
| |
| secondary.shutdown(); |
| cluster.shutdown(); |
| } |
| |
| /** |
| * Test dfsadmin -restoreFailedStorage command |
| * @throws Exception |
| */ |
| public void testDfsAdminCmd() throws Exception { |
| int numDatanodes = 2; |
| |
| |
| cluster = new MiniDFSCluster(0, config, numDatanodes, true, false, true, null, null, null, null); |
| cluster.waitActive(); |
| try { |
| |
| FSImage fsi = cluster.getNameNode().getFSImage(); |
| |
| // it is started with dfs.name.dir.restore set to true (in SetUp()) |
| boolean restore = fsi.getRestoreFailedStorage(); |
| LOG.info("Restore is " + restore); |
| assertEquals(restore, true); |
| |
| // now run DFSAdmnin command |
| |
| String cmd = "-fs NAMENODE -restoreFailedStorage false"; |
| String namenode = config.get(DFSConfigKeys.FS_DEFAULT_NAME_KEY, "file:///"); |
| CommandExecutor executor = |
| CmdFactoryDFS.getCommandExecutor( |
| new CLITestData.TestCmd(cmd, CLITestData.TestCmd.CommandType.DFSADMIN), |
| namenode); |
| executor.executeCommand(cmd); |
| restore = fsi.getRestoreFailedStorage(); |
| assertFalse("After set true call restore is " + restore, restore); |
| |
| // run one more time - to set it to true again |
| cmd = "-fs NAMENODE -restoreFailedStorage true"; |
| executor.executeCommand(cmd); |
| restore = fsi.getRestoreFailedStorage(); |
| assertTrue("After set false call restore is " + restore, restore); |
| |
| // run one more time - no change in value |
| cmd = "-fs NAMENODE -restoreFailedStorage check"; |
| CommandExecutor.Result cmdResult = executor.executeCommand(cmd); |
| restore = fsi.getRestoreFailedStorage(); |
| assertTrue("After check call restore is " + restore, restore); |
| String commandOutput = cmdResult.getCommandOutput(); |
| commandOutput.trim(); |
| assertTrue(commandOutput.contains("restoreFailedStorage is set to true")); |
| |
| |
| } finally { |
| cluster.shutdown(); |
| } |
| } |
| } |