HADOOP-16769. LocalDirAllocator to provide diagnostics when file creation fails (#4842)
The patch provides detailed diagnostics of file creation failure in LocalDirAllocator.
Contributed by: Ashutosh Gupta
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
index f6c9d3c..774e015 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -396,6 +396,10 @@
Context ctx = confChanged(conf);
int numDirs = ctx.localDirs.length;
int numDirsSearched = 0;
+ // Max capacity in any directory
+ long maxCapacity = 0;
+ String errorText = null;
+ IOException diskException = null;
//remove the leading slash from the path (to make sure that the uri
//resolution results in a valid path on the dir being checked)
if (pathStr.startsWith("/")) {
@@ -444,9 +448,18 @@
int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
while (numDirsSearched < numDirs) {
long capacity = ctx.dirDF[dirNum].getAvailable();
+ if (capacity > maxCapacity) {
+ maxCapacity = capacity;
+ }
if (capacity > size) {
- returnPath =
- createPath(ctx.localDirs[dirNum], pathStr, checkWrite);
+ try {
+ returnPath = createPath(ctx.localDirs[dirNum], pathStr,
+ checkWrite);
+ } catch (IOException e) {
+ errorText = e.getMessage();
+ diskException = e;
+ LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e);
+ }
if (returnPath != null) {
ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
break;
@@ -462,8 +475,13 @@
}
//no path found
- throw new DiskErrorException("Could not find any valid local " +
- "directory for " + pathStr);
+ String newErrorText = "Could not find any valid local directory for " +
+ pathStr + " with requested size " + size +
+ " as the max capacity in any directory is " + maxCapacity;
+ if (errorText != null) {
+ newErrorText = newErrorText + " due to " + errorText;
+ }
+ throw new DiskErrorException(newErrorText, diskException);
}
/** Creates a file on the local FS. Pass size as
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
index acda898..939881f39 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
@@ -26,6 +26,7 @@
import java.util.NoSuchElementException;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.Shell;
@@ -532,4 +533,20 @@
}
}
+ /**
+ * Test to verify LocalDirAllocator log details to provide diagnostics when file creation fails.
+ *
+ * @throws Exception
+ */
+ @Test(timeout = 30000)
+ public void testGetLocalPathForWriteForLessSpace() throws Exception {
+ String dir0 = buildBufferDir(ROOT, 0);
+ String dir1 = buildBufferDir(ROOT, 1);
+ conf.set(CONTEXT, dir0 + "," + dir1);
+ LambdaTestUtils.intercept(DiskErrorException.class,
+ String.format("Could not find any valid local directory for %s with requested size %s",
+ "p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
+ () -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
+ }
}
+