HDFS-1505. saveNamespace appears to succeed even if all directories fail to save. Contributed by Aaron T. Myers.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/trunk@1104579 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 327959d..45d78bd 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1003,6 +1003,9 @@
HDFS-1621. Fix references to hadoop-common-${version} in build.xml
(Jolly Chen via todd)
+ HDFS-1505. saveNamespace appears to succeed even if all directories fail
+ to save. (Aaron T. Myers via todd)
+
Release 0.21.1 - Unreleased
HDFS-1466. TestFcHdfsSymlink relies on /tmp/test not existing. (eli)
diff --git a/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 5722000..d38385c 100644
--- a/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -913,9 +913,21 @@
errorSDs.add(sd);
}
}
- storage.reportErrorsOnDirectories(errorSDs);
- if(!editLog.isOpen()) editLog.open();
- ckptState = CheckpointStates.UPLOAD_DONE;
+
+ try {
+ storage.reportErrorsOnDirectories(errorSDs);
+
+ // If there was an error in every storage dir, each one will have been
+ // removed from the list of storage directories.
+ if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0 ||
+ storage.getNumStorageDirs(NameNodeDirType.EDITS) == 0) {
+ throw new IOException("Failed to save any storage directories while saving namespace");
+ }
+
+ if(!editLog.isOpen()) editLog.open();
+ } finally {
+ ckptState = CheckpointStates.UPLOAD_DONE;
+ }
}
/**
diff --git a/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
index fefd374..47bd598 100644
--- a/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
+++ b/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
@@ -20,6 +20,7 @@
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doThrow;
@@ -43,6 +44,7 @@
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Test;
+import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
@@ -261,6 +263,65 @@
public void testCrashWhileMoveLastCheckpoint() throws Exception {
saveNamespaceWithInjectedFault(Fault.MOVE_LAST_CHECKPOINT);
}
+
+ @Test
+ public void testFailedSaveNamespace() throws Exception {
+ Configuration conf = getConf();
+ NameNode.initMetrics(conf, NamenodeRole.ACTIVE);
+ DFSTestUtil.formatNameNode(conf);
+ FSNamesystem fsn = new FSNamesystem(conf);
+
+ // Replace the FSImage with a spy
+ final FSImage originalImage = fsn.dir.fsImage;
+ NNStorage storage = originalImage.getStorage();
+ storage.close(); // unlock any directories that FSNamesystem's initialization may have locked
+
+ NNStorage spyStorage = spy(storage);
+ originalImage.storage = spyStorage;
+ FSImage spyImage = spy(originalImage);
+ fsn.dir.fsImage = spyImage;
+ spyImage.storage.setStorageDirectories(
+ FSNamesystem.getNamespaceDirs(conf),
+ FSNamesystem.getNamespaceEditsDirs(conf));
+
+ doThrow(new IOException("Injected fault: saveFSImage")).
+ when(spyImage).saveFSImage((File)anyObject());
+
+ try {
+ doAnEdit(fsn, 1);
+
+ // Save namespace
+ fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+ try {
+ fsn.saveNamespace();
+ fail("saveNamespace did not fail even when all directories failed!");
+ } catch (IOException ioe) {
+ LOG.info("Got expected exception", ioe);
+ }
+
+ // Ensure that, if storage dirs come back online, things work again.
+ Mockito.reset(spyImage);
+ spyStorage.setRestoreFailedStorage(true);
+ fsn.saveNamespace();
+ checkEditExists(fsn, 1);
+
+ // Now shut down and restart the NN
+ originalImage.close();
+ fsn.close();
+ fsn = null;
+
+ // Start a new namesystem, which should be able to recover
+ // the namespace from the previous incarnation.
+ fsn = new FSNamesystem(conf);
+
+ // Make sure the image loaded including our edits.
+ checkEditExists(fsn, 1);
+ } finally {
+ if (fsn != null) {
+ fsn.close();
+ }
+ }
+ }
@Test
public void testSaveWhileEditsRolled() throws Exception {