blob: cb82c5adc7b67102752e626bcd425139a1979a04 [file] [log] [blame] [view]
layout: page
title: Parquet write configuration
nav_order: 17
## Parquet write configurations in Spark/Velox/Gluten
<table class="spark-config">
<thead>
<tr>
<th></th>
<th>parquet-mr default</th>
<th>Spark default</th>
<th>Velox Default</th>
<th>Gluten Support</th>
</tr>
</thead>
<tbody>
<tr>
<td colspan="5" align="center">
<hr/>Spark <hr/></td>
</tr>
<tr>
<td><code>spark.sql.parquet.outputTimestampType</code></td>
<td></td><td>int96</td><td></td><td></td>
</tr>
<tr>
<td><code>spark.sql.parquet.writeLegacyFormat</code></td>
<td></td><td>false</td><td></td><td></td>
</tr>
<tr>
<td colspan="5" align="center"> <hr/>Velox/Arrow <hr/></td>
</tr>
<tr>
<td><code>write_batch_size</code></td>
<td></td><td></td><td>1024</td><td>Y (batch size)</td>
</tr>
<tr>
<td><code>rowgroup_length</code></td>
<td></td><td></td><td>1M</td><td></td>
</tr>
<tr>
<td><code>compression_level</code></td>
<td></td><td></td><td>0</td><td></td>
</tr>
<tr>
<td><code>page_index</code></td>
<td></td><td></td><td>false</td><td></td>
</tr>
<tr>
<td><code>decimal_as_integer</code></td>
<td></td><td></td><td>false</td><td></td>
</tr>
<tr>
<td><code>statistics_enabled</code></td>
<td></td><td></td><td>false</td><td></td>
</tr>
<tr>
<td colspan="5" align="center"> <hr/>parquet-mr <hr/></td>
</tr>
<tr>
<td><code>parquet.summary.metadata.level</code></td>
<td>all</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.enable.summary-metadata</code></td>
<td>true</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.block.size</code></td>
<td>128m</td><td></td><td></td><td>Y</td>
</tr>
<tr>
<td><code>parquet.page.size</code></td>
<td>1m</td><td></td><td>1M</td><td>Y</td>
</tr>
<tr>
<td><code>parquet.compression</code></td>
<td>uncompressed</td><td>snappy</td><td>uncompressed</td><td>Y</td>
</tr>
<tr>
<td><code>parquet.write.support.class</code></td>
<td>org.apache.parquet.hadoop.api.WriteSupport</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.enable.dictionary</code></td>
<td>true</td><td></td><td>true</td><td>Y</td>
</tr>
<tr>
<td><code>parquet.dictionary.page.size</code></td>
<td>1m</td><td></td><td>1m</td><td></td>
</tr>
<tr>
<td><code>parquet.validation</code></td>
<td>false</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.writer.version</code></td>
<td>PARQUET_1_0</td><td></td><td>PARQUET_2_6</td><td>Y</td>
</tr>
<tr>
<td><code>parquet.memory.pool.ratio</code></td>
<td>0.95</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.memory.min.chunk.size</code></td>
<td>1m</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.writer.max-padding</code></td>
<td>8m</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.page.size.row.check.min</code></td>
<td>100</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.page.size.row.check.max</code></td>
<td>10000</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.page.value.count.threshold</code></td>
<td>Integer.MAX_VALUE / 2</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.page.size.check.estimate</code></td>
<td>true</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.columnindex.truncate.length</code></td>
<td>64</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.statistics.truncate.length</code></td>
<td>2147483647</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.bloom.filter.enabled</code></td>
<td>false</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.bloom.filter.adaptive.enabled</code></td>
<td>false</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.bloom.filter.candidates.number</code></td>
<td>5</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.bloom.filter.expected.ndv</code></td>
<td></td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.bloom.filter.fpp</code></td>
<td>0.01</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.bloom.filter.max.bytes</code></td>
<td>1m</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.decrypt.off-heap.buffer.enabled</code></td>
<td>false</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.page.row.count.limit</code></td>
<td>20000</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.page.write-checksum.enabled</code></td>
<td>true</td><td></td><td>false</td><td></td>
</tr>
<tr>
<td><code>parquet.crypto.factory.class</code></td>
<td>None</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.compression.codec.zstd.bufferPool.enabled</code></td>
<td>true</td><td></td><td></td><td></td>
</tr>
<tr>
<td><code>parquet.compression.codec.zstd.level</code></td>
<td>3</td><td></td><td>0</td><td>Y</td>
</tr>
<tr>
<td><code>parquet.compression.codec.zstd.workers</code></td>
<td>0</td><td></td><td></td><td></td>
</tr>
</tbody>
</table>