adding comments
diff --git a/parquet-hadoop/src/main/java/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/parquet/hadoop/ParquetFileReader.java index 71a8f74..58d2a8d 100644 --- a/parquet-hadoop/src/main/java/parquet/hadoop/ParquetFileReader.java +++ b/parquet-hadoop/src/main/java/parquet/hadoop/ParquetFileReader.java
@@ -512,8 +512,14 @@ try { pageHeader = Util.readPageHeader(this); } catch (IOException e) { - this.pos = initialPos; - pageHeader = Util.readPageHeader(new SequenceInputStream(this, f)); + // this is to workaround a bug where the compressedLength + // of the chunk is missing the size of the header of the dictionary + // to allow reading older files (using dictionary) we need this. + // usually 13 to 19 bytes are missing + // if the last page is smaller than this, the page header itself is truncated in the buffer. + this.pos = initialPos; // resetting the buffer to the position before we got the error + LOG.info("completing the column chunk to read the page header"); + pageHeader = Util.readPageHeader(new SequenceInputStream(this, f)); // trying again from the buffer + remainder of the stream. } return pageHeader; }