HADOOP-19204. VectorIO regression: empty ranges are now rejected (#6887)
- restore old outcome: no-op
- test this
- update spec
This is a critical fix for vector IO and MUST be cherrypicked to all branches with
that feature
Contributed by Steve Loughran
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
index 493b8c3..fa04406 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
@@ -294,7 +294,14 @@
final Optional<Long> fileLength) throws EOFException {
requireNonNull(input, "Null input list");
- checkArgument(!input.isEmpty(), "Empty input list");
+
+ if (input.isEmpty()) {
+ // this may seem a pathological case, but it was valid
+ // before and somehow Spark can call it through parquet.
+ LOG.debug("Empty input list");
+ return input;
+ }
+
final List<? extends FileRange> sortedRanges;
if (input.size() == 1) {
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
index 6cbb54e..db844a9 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
@@ -474,7 +474,6 @@
```python
if ranges = null raise NullPointerException
-if ranges.len() = 0 raise IllegalArgumentException
if allocate = null raise NullPointerException
```
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java
index d6a1fb1..aa478f3 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java
@@ -340,6 +340,17 @@
}
}
+ @Test
+ public void testEmptyRanges() throws Exception {
+ List<FileRange> fileRanges = new ArrayList<>();
+ try (FSDataInputStream in = openVectorFile()) {
+ in.readVectored(fileRanges, allocate);
+ Assertions.assertThat(fileRanges)
+ .describedAs("Empty ranges must stay empty")
+ .isEmpty();
+ }
+ }
+
/**
* Test to validate EOF ranges.
* <p>
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java
index 2a29005..3fd3fe4 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java
@@ -702,12 +702,11 @@
}
/**
- * Empty ranges cannot be sorted.
+ * Empty ranges are allowed.
*/
@Test
- public void testEmptyRangesRaisesIllegalArgument() throws Throwable {
- intercept(IllegalArgumentException.class,
- () -> validateAndSortRanges(Collections.emptyList(), Optional.empty()));
+ public void testEmptyRangesAllowed() throws Throwable {
+ validateAndSortRanges(Collections.emptyList(), Optional.empty());
}
/**