PARQUET-1979: bloom_filter_offset is filled if there are no bloom filters (#869)
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 0f57d5d..3a10b1c 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -519,7 +519,10 @@
if (columnMetaData.getEncodingStats() != null && columnMetaData.getEncodingStats().hasDictionaryPages()) {
metaData.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset());
}
- metaData.setBloom_filter_offset(columnMetaData.getBloomFilterOffset());
+ long bloomFilterOffset = columnMetaData.getBloomFilterOffset();
+ if (bloomFilterOffset >= 0) {
+ metaData.setBloom_filter_offset(bloomFilterOffset);
+ }
if (columnMetaData.getStatistics() != null && !columnMetaData.getStatistics().isEmpty()) {
metaData.setStatistics(toParquetStatistics(columnMetaData.getStatistics(), this.statisticsTruncateLength));
}
@@ -1452,7 +1455,9 @@
column = buildColumnChunkMetaData(metaData, columnPath,
messageType.getType(columnPath.toArray()).asPrimitiveType(), createdBy);
column.setRowGroupOrdinal(rowGroup.getOrdinal());
- column.setBloomFilterOffset(metaData.bloom_filter_offset);
+ if (metaData.isSetBloom_filter_offset()) {
+ column.setBloomFilterOffset(metaData.getBloom_filter_offset());
+ }
} else { // column encrypted with column key
// Metadata will be decrypted later, if this column is accessed
EncryptionWithColumnKey columnKeyStruct = cryptoMetaData.getENCRYPTION_WITH_COLUMN_KEY();
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 0cb6660..791f9ef 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1180,7 +1180,7 @@
*/
public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
long bloomFilterOffset = meta.getBloomFilterOffset();
- if (0 == bloomFilterOffset) {
+ if (bloomFilterOffset < 0) {
return null;
}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index e816b27..587a241 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -210,7 +210,7 @@
private IndexReference columnIndexReference;
private IndexReference offsetIndexReference;
- private long bloomFilterOffset;
+ private long bloomFilterOffset = -1;
protected ColumnChunkMetaData(ColumnChunkProperties columnChunkProperties) {
this(null, columnChunkProperties);
@@ -332,7 +332,7 @@
}
/**
- * @return the offset to the Bloom filter
+ * @return the offset to the Bloom filter or {@code -1} if there is no bloom filter for this column chunk
*/
@Private
public long getBloomFilterOffset() {
@@ -618,7 +618,9 @@
shadowColumnChunkMetaData = parquetMetadataConverter.buildColumnChunkMetaData(metaData, path, primitiveType, createdBy);
this.encodingStats = shadowColumnChunkMetaData.encodingStats;
this.properties = shadowColumnChunkMetaData.properties;
- setBloomFilterOffset(metaData.bloom_filter_offset);
+ if (metaData.isSetBloom_filter_offset()) {
+ setBloomFilterOffset(metaData.getBloom_filter_offset());
+ }
}
@Override
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index aee2dc6..4ffd36b 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -255,6 +255,26 @@
}
@Test
+ public void testBloomFilterOffset() throws IOException {
+ ParquetMetadata origMetaData = createParquetMetaData(null, Encoding.PLAIN);
+ ParquetMetadataConverter converter = new ParquetMetadataConverter();
+
+ // Without bloom filter offset
+ FileMetaData footer = converter.toParquetMetadata(1, origMetaData);
+ assertFalse(footer.getRow_groups().get(0).getColumns().get(0).getMeta_data().isSetBloom_filter_offset());
+ ParquetMetadata convertedMetaData = converter.fromParquetMetadata(footer);
+ assertTrue(convertedMetaData.getBlocks().get(0).getColumns().get(0).getBloomFilterOffset() < 0);
+
+ // With bloom filter offset
+ origMetaData.getBlocks().get(0).getColumns().get(0).setBloomFilterOffset(1234);
+ footer = converter.toParquetMetadata(1, origMetaData);
+ assertTrue(footer.getRow_groups().get(0).getColumns().get(0).getMeta_data().isSetBloom_filter_offset());
+ assertEquals(1234, footer.getRow_groups().get(0).getColumns().get(0).getMeta_data().getBloom_filter_offset());
+ convertedMetaData = converter.fromParquetMetadata(footer);
+ assertEquals(1234, convertedMetaData.getBlocks().get(0).getColumns().get(0).getBloomFilterOffset());
+ }
+
+ @Test
public void testLogicalTypesBackwardCompatibleWithConvertedTypes() {
ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
MessageType expected = Types.buildMessage()