HADOOP-16769. LocalDirAllocator to provide diagnostics when file creation fails (#4842)


The patch provides detailed diagnostics of file creation failure in LocalDirAllocator.

Contributed by: Ashutosh Gupta
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
index f6c9d3c..774e015 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -396,6 +396,10 @@
       Context ctx = confChanged(conf);
       int numDirs = ctx.localDirs.length;
       int numDirsSearched = 0;
+      // Max capacity in any directory
+      long maxCapacity = 0;
+      String errorText = null;
+      IOException diskException = null;
       //remove the leading slash from the path (to make sure that the uri
       //resolution results in a valid path on the dir being checked)
       if (pathStr.startsWith("/")) {
@@ -444,9 +448,18 @@
         int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
         while (numDirsSearched < numDirs) {
           long capacity = ctx.dirDF[dirNum].getAvailable();
+          if (capacity > maxCapacity) {
+            maxCapacity = capacity;
+          }
           if (capacity > size) {
-            returnPath =
-                createPath(ctx.localDirs[dirNum], pathStr, checkWrite);
+            try {
+              returnPath = createPath(ctx.localDirs[dirNum], pathStr,
+                  checkWrite);
+            } catch (IOException e) {
+              errorText = e.getMessage();
+              diskException = e;
+              LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e);
+            }
             if (returnPath != null) {
               ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
               break;
@@ -462,8 +475,13 @@
       }
       
       //no path found
-      throw new DiskErrorException("Could not find any valid local " +
-          "directory for " + pathStr);
+      String newErrorText = "Could not find any valid local directory for " +
+          pathStr + " with requested size " + size +
+          " as the max capacity in any directory is " + maxCapacity;
+      if (errorText != null) {
+        newErrorText = newErrorText + " due to " + errorText;
+      }
+      throw new DiskErrorException(newErrorText, diskException);
     }
 
     /** Creates a file on the local FS. Pass size as 
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
index acda898..939881f39 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
@@ -26,6 +26,7 @@
 import java.util.NoSuchElementException;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.test.LambdaTestUtils;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.Shell;
 
@@ -532,4 +533,20 @@
     }
   }
 
+  /**
+   * Test to verify LocalDirAllocator log details to provide diagnostics when file creation fails.
+   *
+   * @throws Exception
+   */
+  @Test(timeout = 30000)
+  public void testGetLocalPathForWriteForLessSpace() throws Exception {
+    String dir0 = buildBufferDir(ROOT, 0);
+    String dir1 = buildBufferDir(ROOT, 1);
+    conf.set(CONTEXT, dir0 + "," + dir1);
+    LambdaTestUtils.intercept(DiskErrorException.class,
+        String.format("Could not find any valid local directory for %s with requested size %s",
+            "p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
+        () -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
+  }
 }
+