HADOOP-17323. S3A getFileStatus("/") to skip IO (#2479)
Contributed by Mukund Thakur.
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 945a0ad..f2cd2cd 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -3144,6 +3144,10 @@
"s3GetFileStatus(%s) wants to know if a directory is empty but"
+ " does not request a list probe", path);
+ if (key.isEmpty() && !needEmptyDirectoryFlag) {
+ return new S3AFileStatus(Tristate.UNKNOWN, path, username);
+ }
+
if (!key.isEmpty() && !key.endsWith("/")
&& probes.contains(StatusProbeEnum.Head)) {
try {
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index 6461ecd..ca8e49c 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -29,6 +29,7 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import org.assertj.core.api.Assertions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -259,6 +260,36 @@
}
@Test
+ public void testCostOfRootFileStatus() throws Throwable {
+ Path root = path("/");
+ S3AFileStatus rootStatus = verifyRawInnerGetFileStatus(
+ root,
+ false,
+ StatusProbeEnum.ALL,
+ ROOT_FILE_STATUS_PROBE);
+ String rootStatusContent = rootStatus.toString();
+ Assertions.assertThat(rootStatus.isDirectory())
+ .describedAs("Status returned should be a directory "
+ + rootStatusContent)
+ .isEqualTo(true);
+ Assertions.assertThat(rootStatus.isEmptyDirectory())
+ .isEqualTo(Tristate.UNKNOWN);
+
+ rootStatus = verifyRawInnerGetFileStatus(
+ root,
+ true,
+ StatusProbeEnum.ALL,
+ FILE_STATUS_DIR_PROBE);
+ Assertions.assertThat(rootStatus.isDirectory())
+ .describedAs("Status returned should be a directory "
+ + rootStatusContent)
+ .isEqualTo(true);
+ Assertions.assertThat(rootStatus.isEmptyDirectory())
+ .isNotEqualByComparingTo(Tristate.UNKNOWN);
+
+ }
+
+ @Test
public void testIsDirIsFileMissingPath() throws Throwable {
describe("performing isDir and isFile on a missing file");
Path path = methodPath();
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java
index 0a1438d..8605d7f 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/OperationCost.java
@@ -77,6 +77,11 @@
public static final OperationCost FILE_STATUS_FILE_PROBE = HEAD_OPERATION;
/**
+ * Cost of getFileStatus on root directory.
+ */
+ public static final OperationCost ROOT_FILE_STATUS_PROBE = NO_IO;
+
+ /**
* Cost of {@link org.apache.hadoop.fs.s3a.impl.StatusProbeEnum#ALL}.
*/
public static final OperationCost FILE_STATUS_ALL_PROBES =