PARQUET-511: Integer overflow when counting values in column.
This commit fixes an issue when the number of entries in a column page is larger than the size of an integer. No exception is thrown directly, but the def level is set incorrectly, leading to a null value being returned during read.
Author: Michal Gorecki <goreckim@amazon.com>
Closes #321 from goreckm/int-overflow and squashes the following commits:
d224815 [Michal Gorecki] enhancing exception message
7334be2 [Michal Gorecki] PARQUET-511: Integer overflow when counting values in column.
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
index 8c2a4bf..3fc327e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
@@ -150,7 +150,7 @@
private int dictionaryId;
private long endOfPageValueCount;
- private int readValues = 0;
+ private long readValues = 0;
private int pageValueCount = 0;
private final PrimitiveConverter converter;
@@ -352,8 +352,8 @@
this.dictionary = null;
}
this.totalValueCount = pageReader.getTotalValueCount();
- if (totalValueCount == 0) {
- throw new ParquetDecodingException("totalValueCount == 0");
+ if (totalValueCount <= 0) {
+ throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
}
consume();
}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
index b0d0d30..f428e85 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
@@ -64,7 +64,7 @@
this.decompressor = decompressor;
this.compressedPages = new LinkedList<DataPage>(compressedPages);
this.compressedDictionaryPage = compressedDictionaryPage;
- int count = 0;
+ long count = 0;
for (DataPage p : compressedPages) {
count += p.getValueCount();
}