hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hdfs.server.datanode;

 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assume.assumeTrue;

 import java.io.File;
 import java.lang.management.ManagementFactory;
 import java.util.ArrayList;
 import java.util.concurrent.TimeUnit;

 import javax.management.MBeanServer;
 import javax.management.ObjectName;

 import com.google.common.base.Supplier;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.ReconfigurationException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;

 /**
  * Test reporting of DN volume failure counts and metrics.
  */
 public class TestDataNodeVolumeFailureReporting {

   private static final Log LOG =
       LogFactory.getLog(TestDataNodeVolumeFailureReporting.class);
   {
     GenericTestUtils.setLogLevel(TestDataNodeVolumeFailureReporting.LOG,
         Level.ALL);
   }

   private FileSystem fs;
   private MiniDFSCluster cluster;
   private Configuration conf;
   private String dataDir;
   private long volumeCapacity;

   // Sleep at least 3 seconds (a 1s heartbeat plus padding) to allow
   // for heartbeats to propagate from the datanodes to the namenode.
   final int WAIT_FOR_HEARTBEATS = 3000;

   // Wait at least (2 * re-check + 10 * heartbeat) seconds for
   // a datanode to be considered dead by the namenode.
   final int WAIT_FOR_DEATH = 15000;

   @Before
   public void setUp() throws Exception {
     // These tests use DataNodeTestUtils#injectDataDirFailure() to simulate
     // volume failures which is currently not supported on Windows.
     assumeTrue(!Path.WINDOWS);
     // Allow a single volume failure (there are two volumes)
     initCluster(1, 2, 1);
   }

   @After
   public void tearDown() throws Exception {
     IOUtils.cleanup(LOG, fs);
     if (cluster != null) {
       cluster.shutdown();
       cluster = null;
     }
   }

   /**
    * Test that individual volume failures do not cause DNs to fail, that
    * all volumes failed on a single datanode do cause it to fail, and
    * that the capacities and liveliness is adjusted correctly in the NN.
    */
   @Test
   public void testSuccessiveVolumeFailures() throws Exception {
     // Bring up two more datanodes
     cluster.startDataNodes(conf, 2, true, null, null);
     cluster.waitActive();

     /*
      * Calculate the total capacity of all the datanodes. Sleep for
      * three seconds to be sure the datanodes have had a chance to
      * heartbeat their capacities.
      */
     Thread.sleep(WAIT_FOR_HEARTBEATS);
     final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
         ).getDatanodeManager();

     final long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
     long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

     File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
     File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
     File dn3Vol1 = new File(dataDir, "data"+(2*2+1));
     File dn3Vol2 = new File(dataDir, "data"+(2*2+2));

     /*
      * Make the 1st volume directories on the first two datanodes
      * non-accessible.  We don't make all three 1st volume directories
      * readonly since that would cause the entire pipeline to
      * fail. The client does not retry failed nodes even though
      * perhaps they could succeed because just a single volume failed.
      */
     DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

     /*
      * Create file1 and wait for 3 replicas (ie all DNs can still
      * store a block).  Then assert that all DNs are up, despite the
      * volume failures.
      */
     Path file1 = new Path("/test1");
     DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short)3);
     ArrayList<DataNode> dns = cluster.getDataNodes();
     assertTrue("DN1 should be up", dns.get(0).isDatanodeUp());
     assertTrue("DN2 should be up", dns.get(1).isDatanodeUp());
     assertTrue("DN3 should be up", dns.get(2).isDatanodeUp());

     /*
      * The metrics should confirm the volume failures.
      */
     checkFailuresAtDataNode(dns.get(0), 1, true, dn1Vol1.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(1), 1, true, dn2Vol1.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(2), 0, true);

     // Ensure we wait a sufficient amount of time
     assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH;

     // Eventually the NN should report two volume failures
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(2), true);

     /*
      * Now fail a volume on the third datanode. We should be able to get
      * three replicas since we've already identified the other failures.
      */
     DataNodeTestUtils.injectDataDirFailure(dn3Vol1);
     Path file2 = new Path("/test2");
     DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file2, (short)3);
     assertTrue("DN3 should still be up", dns.get(2).isDatanodeUp());
     checkFailuresAtDataNode(dns.get(2), 1, true, dn3Vol1.getAbsolutePath());

     DataNodeTestUtils.triggerHeartbeat(dns.get(2));
     checkFailuresAtNameNode(dm, dns.get(2), true, dn3Vol1.getAbsolutePath());

     /*
      * Once the datanodes have a chance to heartbeat their new capacity the
      * total capacity should be down by three volumes (assuming the host
      * did not grow or shrink the data volume while the test was running).
      */
     dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 3,
         origCapacity - (3*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 3);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(2), true, dn3Vol1.getAbsolutePath());

     /*
      * Now fail the 2nd volume on the 3rd datanode. All its volumes
      * are now failed and so it should report two volume failures
      * and that it's no longer up. Only wait for two replicas since
      * we'll never get a third.
      */
     DataNodeTestUtils.injectDataDirFailure(dn3Vol2);
     Path file3 = new Path("/test3");
     DFSTestUtil.createFile(fs, file3, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file3, (short)2);

     // The DN should consider itself dead
     DFSTestUtil.waitForDatanodeDeath(dns.get(2));

     // And report two failed volumes
     checkFailuresAtDataNode(dns.get(2), 2, true, dn3Vol1.getAbsolutePath(),
         dn3Vol2.getAbsolutePath());

     // The NN considers the DN dead
     DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 2,
         origCapacity - (4*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

     /*
      * The datanode never tries to restore the failed volume, even if
      * it's subsequently repaired, but it should see this volume on
      * restart, so file creation should be able to succeed after
      * restoring the data directories and restarting the datanodes.
      */
     DataNodeTestUtils.restoreDataDirFromFailure(
         dn1Vol1, dn2Vol1, dn3Vol1, dn3Vol2);
     cluster.restartDataNodes();
     cluster.waitActive();
     Path file4 = new Path("/test4");
     DFSTestUtil.createFile(fs, file4, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file4, (short)3);

     /*
      * Eventually the capacity should be restored to its original value,
      * and that the volume failure count should be reported as zero by
      * both the metrics and the NN.
      */
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 0, origCapacity,
         WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 0);
     dns = cluster.getDataNodes();
     checkFailuresAtNameNode(dm, dns.get(0), true);
     checkFailuresAtNameNode(dm, dns.get(1), true);
     checkFailuresAtNameNode(dm, dns.get(2), true);
   }

   /**
    * Test that the NN re-learns of volume failures after restart.
    */
   @Test
   public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
     // Bring up two more datanodes that can tolerate 1 failure
     cluster.startDataNodes(conf, 2, true, null, null);
     cluster.waitActive();

     final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
         ).getDatanodeManager();
     long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
     long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

     // Fail the first volume on both datanodes (we have to keep the
     // third healthy so one node in the pipeline will not fail).
     File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
     File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
     DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

     Path file1 = new Path("/test1");
     DFSTestUtil.createFile(fs, file1, 1024, (short)2, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short)2);
     ArrayList<DataNode> dns = cluster.getDataNodes();

     // The NN reports two volumes failures
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

     // After restarting the NN it still see the two failures
     cluster.restartNameNode(0);
     cluster.waitActive();
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
   }

   @Test
   public void testMultipleVolFailuresOnNode() throws Exception {
     // Reinitialize the cluster, configured with 4 storage locations per DataNode
     // and tolerating up to 2 failures.
     tearDown();
     initCluster(3, 4, 2);

     // Calculate the total capacity of all the datanodes. Sleep for three seconds
     // to be sure the datanodes have had a chance to heartbeat their capacities.
     Thread.sleep(WAIT_FOR_HEARTBEATS);
     DatanodeManager dm = cluster.getNamesystem().getBlockManager()
         .getDatanodeManager();

     long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
     long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

     File dn1Vol1 = new File(dataDir, "data"+(4*0+1));
     File dn1Vol2 = new File(dataDir, "data"+(4*0+2));
     File dn2Vol1 = new File(dataDir, "data"+(4*1+1));
     File dn2Vol2 = new File(dataDir, "data"+(4*1+2));

     // Make the first two volume directories on the first two datanodes
     // non-accessible.
     DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn1Vol2, dn2Vol1, dn2Vol2);

     // Create file1 and wait for 3 replicas (ie all DNs can still store a block).
     // Then assert that all DNs are up, despite the volume failures.
     Path file1 = new Path("/test1");
     DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short)3);

     // Create additional file to trigger failure based volume check on dn1Vol2
     // and dn2Vol2.
     Path file2 = new Path("/test2");
     DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
     DFSTestUtil.waitReplication(fs, file2, (short)3);

     ArrayList<DataNode> dns = cluster.getDataNodes();
     assertTrue("DN1 should be up", dns.get(0).isDatanodeUp());
     assertTrue("DN2 should be up", dns.get(1).isDatanodeUp());
     assertTrue("DN3 should be up", dns.get(2).isDatanodeUp());

     checkFailuresAtDataNode(dns.get(0), 1, true, dn1Vol1.getAbsolutePath(),
         dn1Vol2.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(1), 1, true, dn2Vol1.getAbsolutePath(),
         dn2Vol2.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(2), 0, true);

     // Ensure we wait a sufficient amount of time
     assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH;

     // Eventually the NN should report four volume failures
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 4,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 4);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath(),
         dn1Vol2.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath(),
         dn2Vol2.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(2), true);
   }

   @Test
   public void testDataNodeReconfigureWithVolumeFailures() throws Exception {
     // Bring up two more datanodes
     cluster.startDataNodes(conf, 2, true, null, null);
     cluster.waitActive();

     final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
         ).getDatanodeManager();
     long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
     long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

     // Fail the first volume on both datanodes (we have to keep the
     // third healthy so one node in the pipeline will not fail).
     File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
     File dn1Vol2 = new File(dataDir, "data"+(2*0+2));
     File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
     File dn2Vol2 = new File(dataDir, "data"+(2*1+2));
     DataNodeTestUtils.injectDataDirFailure(dn1Vol1);
     DataNodeTestUtils.injectDataDirFailure(dn2Vol1);

     Path file1 = new Path("/test1");
     DFSTestUtil.createFile(fs, file1, 1024, (short)2, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short)2);

     ArrayList<DataNode> dns = cluster.getDataNodes();
     assertTrue("DN1 should be up", dns.get(0).isDatanodeUp());
     assertTrue("DN2 should be up", dns.get(1).isDatanodeUp());
     assertTrue("DN3 should be up", dns.get(2).isDatanodeUp());

     checkFailuresAtDataNode(dns.get(0), 1, true, dn1Vol1.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(1), 1, true, dn2Vol1.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(2), 0, true);

     // Ensure we wait a sufficient amount of time
     assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH;

     // The NN reports two volume failures
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

     // Reconfigure again to try to add back the failed volumes.
     DataNodeTestUtils.reconfigureDataNode(dns.get(0), dn1Vol1, dn1Vol2);
     DataNodeTestUtils.reconfigureDataNode(dns.get(1), dn2Vol1, dn2Vol2);

     DataNodeTestUtils.triggerHeartbeat(dns.get(0));
     DataNodeTestUtils.triggerHeartbeat(dns.get(1));

     checkFailuresAtDataNode(dns.get(0), 1, true, dn1Vol1.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(1), 1, true, dn2Vol1.getAbsolutePath());

     // Ensure we wait a sufficient amount of time.
     assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH;

     // The NN reports two volume failures again.
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

     // Reconfigure a third time with the failed volumes.  Afterwards, we expect
     // the same volume failures to be reported.  (No double-counting.)
     DataNodeTestUtils.reconfigureDataNode(dns.get(0), dn1Vol1, dn1Vol2);
     DataNodeTestUtils.reconfigureDataNode(dns.get(1), dn2Vol1, dn2Vol2);

     DataNodeTestUtils.triggerHeartbeat(dns.get(0));
     DataNodeTestUtils.triggerHeartbeat(dns.get(1));

     checkFailuresAtDataNode(dns.get(0), 1, true, dn1Vol1.getAbsolutePath());
     checkFailuresAtDataNode(dns.get(1), 1, true, dn2Vol1.getAbsolutePath());

     // Ensure we wait a sufficient amount of time.
     assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH;

     // The NN reports two volume failures again.
     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
         origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 2);
     checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
     checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

     // Replace failed volume with healthy volume and run reconfigure DataNode.
     // The failed volume information should be cleared.
     DataNodeTestUtils.restoreDataDirFromFailure(dn1Vol1, dn2Vol1);
     DataNodeTestUtils.reconfigureDataNode(dns.get(0), dn1Vol1, dn1Vol2);
     DataNodeTestUtils.reconfigureDataNode(dns.get(1), dn2Vol1, dn2Vol2);

     DataNodeTestUtils.triggerHeartbeat(dns.get(0));
     DataNodeTestUtils.triggerHeartbeat(dns.get(1));

     checkFailuresAtDataNode(dns.get(0), 1, true);
     checkFailuresAtDataNode(dns.get(1), 1, true);

     DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 0,
         origCapacity, WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(true, 0);
     checkFailuresAtNameNode(dm, dns.get(0), true);
     checkFailuresAtNameNode(dm, dns.get(1), true);
   }

   @Test
   public void testAutoFormatEmptyDirectory() throws Exception {
     // remove the version file
     File dn1Vol1 = cluster.getStorageDir(0, 0);
     File current = new File(dn1Vol1, "current");
     File currentVersion = new File(current, "VERSION");
     currentVersion.delete();
     // restart the data node
     assertTrue(cluster.restartDataNodes(true));
     // the DN should tolerate one volume failure.
     cluster.waitActive();
     ArrayList<DataNode> dns = cluster.getDataNodes();
     DataNode dn = dns.get(0);
     assertFalse("DataNode should not reformat if VERSION is missing",
         currentVersion.exists());

     // Make sure DN's JMX sees the failed volume
     final String[] expectedFailedVolumes = {dn1Vol1.getAbsolutePath()};
     DataNodeTestUtils.triggerHeartbeat(dn);
     FsDatasetSpi<?> fsd = dn.getFSDataset();
     assertEquals(expectedFailedVolumes.length, fsd.getNumFailedVolumes());
     assertArrayEquals(expectedFailedVolumes, fsd.getFailedStorageLocations());
     // there shouldn't be any more volume failures due to I/O failure
     checkFailuresAtDataNode(dn, 0, false, expectedFailedVolumes);

     // The NN reports one volume failures
     final DatanodeManager dm = cluster.getNamesystem().getBlockManager().
         getDatanodeManager();
     long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
     DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1,
         (1*dnCapacity), WAIT_FOR_HEARTBEATS);
     checkAggregateFailuresAtNameNode(false, 1);
     checkFailuresAtNameNode(dm, dns.get(0), false, dn1Vol1.getAbsolutePath());
   }

   @Test
   public void testAutoFormatEmptyBlockPoolDirectory() throws Exception {
     // remove the version file
     DataNode dn = cluster.getDataNodes().get(0);
     String bpid = cluster.getNamesystem().getBlockPoolId();
     BlockPoolSliceStorage bps = dn.getStorage().getBPStorage(bpid);
     Storage.StorageDirectory dir = bps.getStorageDir(0);
     File current = dir.getCurrentDir();

     File currentVersion = new File(current, "VERSION");
     currentVersion.delete();
     // restart the data node
     assertTrue(cluster.restartDataNodes(true));
     // the DN should tolerate one volume failure.
     cluster.waitActive();
     assertFalse("DataNode should not reformat if VERSION is missing",
         currentVersion.exists());
   }

   /**
    * Verify DataNode NumFailedVolumes and FailedStorageLocations
    * after hot swap out of failed volume.
    */
   @Test (timeout = 120000)
   public void testHotSwapOutFailedVolumeAndReporting()
           throws Exception {
     LOG.info("Starting testHotSwapOutFailedVolumeAndReporting!");
     final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
     final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2));
     final DataNode dn0 = cluster.getDataNodes().get(0);
     final String oldDataDirs = dn0.getConf().get(
             DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);

     final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
     final ObjectName mxbeanName = new ObjectName(
         "Hadoop:service=DataNode,name=FSDatasetState-" + dn0.getDatanodeUuid());
     int numFailedVolumes = (int) mbs.getAttribute(mxbeanName,
         "NumFailedVolumes");
     Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(),
         numFailedVolumes);
     checkFailuresAtDataNode(dn0, 0, false, new String[] {});

     // Fail dn0Vol1 first.
     // Verify NumFailedVolumes and FailedStorageLocations are empty.
     DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
     DataNodeTestUtils.waitForDiskError(dn0,
         DataNodeTestUtils.getVolume(dn0, dn0Vol1));
     numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
     Assert.assertEquals(1, numFailedVolumes);
     Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(),
             numFailedVolumes);
     checkFailuresAtDataNode(dn0, 1, true,
         new String[] {dn0Vol1.getAbsolutePath()});

     // Reconfigure disks without fixing the failed disk.
     // Verify NumFailedVolumes and FailedStorageLocations haven't changed.
     try {
       dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY,
           oldDataDirs);
     } catch (ReconfigurationException e) {
       Assert.assertTrue("Reconfigure exception doesn't have expected path!",
           e.getCause().getMessage().contains(dn0Vol1.getAbsolutePath()));
     }
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
       public Boolean get() {
         try {
           return ((int) mbs.getAttribute(mxbeanName,
               "NumFailedVolumes") == 1);
         } catch (Exception e) {
           return false;
         }
       }
     }, 1000, 30000);
     Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(),
         numFailedVolumes);
     checkFailuresAtDataNode(dn0, 1, true,
         new String[] {dn0Vol1.getAbsolutePath()});

     // Hot swap out the failed volume.
     // Verify NumFailedVolumes and FailedStorageLocations are reset.
     String dataDirs = dn0Vol2.getPath();
     dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY,
             dataDirs);
     numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
     Assert.assertEquals(0, numFailedVolumes);
     Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(),
             numFailedVolumes);
     checkFailuresAtDataNode(dn0, 0, true, new String[] {});

     // Fix failure volume dn0Vol1 and remount it back.
     // Verify NumFailedVolumes and FailedStorageLocations are empty.
     DataNodeTestUtils.restoreDataDirFromFailure(dn0Vol1);
     dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY,
             oldDataDirs);
     numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
     Assert.assertEquals(0, numFailedVolumes);
     Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(),
         numFailedVolumes);
     checkFailuresAtDataNode(dn0, 0, true, new String[] {});

     // Fail dn0Vol2.
     // Verify NumFailedVolumes and FailedStorageLocations are updated.
     DataNodeTestUtils.injectDataDirFailure(dn0Vol2);
     DataNodeTestUtils.waitForDiskError(dn0,
         DataNodeTestUtils.getVolume(dn0, dn0Vol2));
     numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
     Assert.assertEquals(1, numFailedVolumes);
     Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(),
         numFailedVolumes);
     checkFailuresAtDataNode(dn0, 1, true,
         new String[] {dn0Vol2.getAbsolutePath()});

     // Verify DataNode tolerating one disk failure.
     assertTrue(dn0.shouldRun());
   }

   /**
    * Checks the NameNode for correct values of aggregate counters tracking failed
    * volumes across all DataNodes.
    *
    * @param expectCapacityKnown if true, then expect that the capacities of the
    *     volumes were known before the failures, and therefore the lost capacity
    *     can be reported
    * @param expectedVolumeFailuresTotal expected number of failed volumes
    */
   private void checkAggregateFailuresAtNameNode(boolean expectCapacityKnown,
       int expectedVolumeFailuresTotal) {
     FSNamesystem ns = cluster.getNamesystem();
     assertEquals(expectedVolumeFailuresTotal, ns.getVolumeFailuresTotal());
     long expectedCapacityLost = getExpectedCapacityLost(expectCapacityKnown,
         expectedVolumeFailuresTotal);
     assertEquals(expectedCapacityLost, ns.getEstimatedCapacityLostTotal());
   }

   /**
    * Checks a DataNode for correct reporting of failed volumes.
    *
    * @param dn DataNode to check
    * @param expectedVolumeFailuresCounter metric counter value for
    *     VolumeFailures.  The current implementation actually counts the number
    *     of failed disk checker cycles, which may be different from the length of
    *     expectedFailedVolumes if multiple disks fail in the same disk checker
    *     cycle
    * @param expectCapacityKnown if true, then expect that the capacities of the
    *     volumes were known before the failures, and therefore the lost capacity
    *     can be reported
    * @param expectedFailedVolumes expected locations of failed volumes
    * @throws Exception if there is any failure
    */
   private void checkFailuresAtDataNode(DataNode dn,
       long expectedVolumeFailuresCounter, boolean expectCapacityKnown,
       String... expectedFailedVolumes) throws Exception {
     FsDatasetSpi<?> fsd = dn.getFSDataset();
     assertEquals(expectedFailedVolumes.length, fsd.getNumFailedVolumes());
     assertArrayEquals(expectedFailedVolumes, fsd.getFailedStorageLocations());
     if (expectedFailedVolumes.length > 0) {
       assertTrue(fsd.getLastVolumeFailureDate() > 0);
       long expectedCapacityLost = getExpectedCapacityLost(expectCapacityKnown,
           expectedFailedVolumes.length);
       assertEquals(expectedCapacityLost, fsd.getEstimatedCapacityLostTotal());
     } else {
       assertEquals(0, fsd.getLastVolumeFailureDate());
       assertEquals(0, fsd.getEstimatedCapacityLostTotal());
     }
   }

   /**
    * Checks NameNode tracking of a particular DataNode for correct reporting of
    * failed volumes.
    *
    * @param dm DatanodeManager to check
    * @param dn DataNode to check
    * @param expectCapacityKnown if true, then expect that the capacities of the
    *     volumes were known before the failures, and therefore the lost capacity
    *     can be reported
    * @param expectedFailedVolumes expected locations of failed volumes
    * @throws Exception if there is any failure
    */
   private void checkFailuresAtNameNode(DatanodeManager dm, DataNode dn,
       boolean expectCapacityKnown, String... expectedFailedVolumes)
       throws Exception {
     DatanodeDescriptor dd = cluster.getNamesystem().getBlockManager()
         .getDatanodeManager().getDatanode(dn.getDatanodeId());
     assertEquals(expectedFailedVolumes.length, dd.getVolumeFailures());
     VolumeFailureSummary volumeFailureSummary = dd.getVolumeFailureSummary();
     if (expectedFailedVolumes.length > 0) {
       assertArrayEquals(expectedFailedVolumes, volumeFailureSummary
           .getFailedStorageLocations());
       assertTrue(volumeFailureSummary.getLastVolumeFailureDate() > 0);
       long expectedCapacityLost = getExpectedCapacityLost(expectCapacityKnown,
           expectedFailedVolumes.length);
       assertEquals(expectedCapacityLost,
           volumeFailureSummary.getEstimatedCapacityLostTotal());
     } else {
       assertNull(volumeFailureSummary);
     }
   }

   /**
    * Returns expected capacity lost for use in assertions.  The return value is
    * dependent on whether or not it is expected that the volume capacities were
    * known prior to the failures.
    *
    * @param expectCapacityKnown if true, then expect that the capacities of the
    *     volumes were known before the failures, and therefore the lost capacity
    *     can be reported
    * @param expectedVolumeFailuresTotal expected number of failed volumes
    * @return estimated capacity lost in bytes
    */
   private long getExpectedCapacityLost(boolean expectCapacityKnown,
       int expectedVolumeFailuresTotal) {
     return expectCapacityKnown ? expectedVolumeFailuresTotal * volumeCapacity :
         0;
   }

   /**
    * Initializes the cluster.
    *
    * @param numDataNodes number of datanodes
    * @param storagesPerDatanode number of storage locations on each datanode
    * @param failedVolumesTolerated number of acceptable volume failures
    * @throws Exception if there is any failure
    */
   private void initCluster(int numDataNodes, int storagesPerDatanode,
       int failedVolumesTolerated) throws Exception {
     conf = new HdfsConfiguration();
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 512L);
     /*
      * Lower the DN heartbeat, DF rate, and recheck interval to one second
      * so state about failures and datanode death propagates faster.
      */
     conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_DF_INTERVAL_KEY, 1000);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000);
     conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY,
         failedVolumesTolerated);
     conf.setTimeDuration(DFSConfigKeys.DFS_DATANODE_DISK_CHECK_MIN_GAP_KEY,
         0, TimeUnit.MILLISECONDS);
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes)
         .storagesPerDatanode(storagesPerDatanode).build();
     cluster.waitActive();
     fs = cluster.getFileSystem();
     dataDir = cluster.getDataDirectory();
     long dnCapacity = DFSTestUtil.getDatanodeCapacity(
         cluster.getNamesystem().getBlockManager().getDatanodeManager(), 0);
     volumeCapacity = dnCapacity / cluster.getStoragesPerDatanode();
   }
 }