Core: Ignore split offsets when the last split offset is past the file length (#8860) (#8861)
diff --git a/core/src/main/java/org/apache/iceberg/BaseFile.java b/core/src/main/java/org/apache/iceberg/BaseFile.java
index 793301e..31e9145 100644
--- a/core/src/main/java/org/apache/iceberg/BaseFile.java
+++ b/core/src/main/java/org/apache/iceberg/BaseFile.java
@@ -460,6 +460,16 @@
@Override
public List<Long> splitOffsets() {
+ if (splitOffsets == null || splitOffsets.length == 0) {
+ return null;
+ }
+
+ // If the last split offset is past the file size this means the split offsets are corrupted and
+ // should not be used
+ if (splitOffsets[splitOffsets.length - 1] >= fileSizeInBytes) {
+ return null;
+ }
+
return ArrayUtil.toUnmodifiableLongList(splitOffsets);
}
diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java b/core/src/test/java/org/apache/iceberg/TableTestBase.java
index 3922df8..d508459 100644
--- a/core/src/test/java/org/apache/iceberg/TableTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java
@@ -113,7 +113,7 @@
.withFileSizeInBytes(10)
.withPartitionPath("data_bucket=2") // easy way to set partition data for now
.withRecordCount(1)
- .withSplitOffsets(ImmutableList.of(2L, 2_000_000L))
+ .withSplitOffsets(ImmutableList.of(2L, 8L))
.build();
static final DeleteFile FILE_C2_DELETES =
FileMetadata.deleteFileBuilder(SPEC)
@@ -129,7 +129,7 @@
.withFileSizeInBytes(10)
.withPartitionPath("data_bucket=3") // easy way to set partition data for now
.withRecordCount(1)
- .withSplitOffsets(ImmutableList.of(3L, 3_000L, 3_000_000L))
+ .withSplitOffsets(ImmutableList.of(0L, 3L, 6L))
.build();
static final DeleteFile FILE_D2_DELETES =
FileMetadata.deleteFileBuilder(SPEC)
diff --git a/core/src/test/java/org/apache/iceberg/TestManifestReader.java b/core/src/test/java/org/apache/iceberg/TestManifestReader.java
index f1674af..3bb9bf1 100644
--- a/core/src/test/java/org/apache/iceberg/TestManifestReader.java
+++ b/core/src/test/java/org/apache/iceberg/TestManifestReader.java
@@ -25,6 +25,7 @@
import java.util.stream.Collectors;
import org.apache.iceberg.ManifestEntry.Status;
import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Streams;
@@ -159,4 +160,20 @@
}
}
}
+
+ @Test
+ public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException {
+ DataFile invalidOffset =
+ DataFiles.builder(SPEC)
+ .withPath("/path/to/invalid-offsets.parquet")
+ .withFileSizeInBytes(10)
+ .withRecordCount(1)
+ .withSplitOffsets(ImmutableList.of(2L, 1000L)) // Offset 1000 is out of bounds
+ .build();
+ ManifestFile manifest = writeManifest(1000L, invalidOffset);
+ try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
+ DataFile file = Iterables.getOnlyElement(reader);
+ Assertions.assertThat(file.splitOffsets()).isNull();
+ }
+ }
}