blob: 57f3ca924f3417be0f0772b484a11fda288eec74 [file] [log] [blame]
<!DOCTYPE html
SYSTEM "about:legacy-compat">
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2023"><meta name="DC.rights.owner" content="(C) Copyright 2023"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_set.html"><meta name="prodname" content="Impala"><meta name="version" content="Impala 3.4.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="parquet_dictionary_filtering"><link rel="stylesheet" type="text/css" href="../css/commonltr.css"><link rel="stylesheet" type="text/css" href="../css/dita-ot-doc.css"><title>PARQUET_DICTIONARY_FILTERING Query Option (Impala 2.9 or higher only)</title></head><body id="parquet_dictionary_filtering"><header role="banner"><!--
The DITA Open Toolkit is licensed for use under the the Apache
Software Foundation License v2.0.
A copy of the Apache Software Foundation License 2.0 is
available at http://opensource.org/licenses/apache2.0.php
This statement must be included in any copies of DITA Open
Toolkit code.
--><div class="header">
<p>Apache Impala</p>
<hr>
</div></header><nav role="toc"><ul><li><a href="../topics/impala_intro.html">Introducing Apache Impala</a></li><li><a href="../topics/impala_concepts.html">Concepts and Architecture</a></li><li><a href="../topics/impala_planning.html">Deployment Planning</a></li><li><a href="../topics/impala_install.html">Installing Impala</a></li><li><a href="../topics/impala_config.html">Managing Impala</a></li><li><a href="../topics/impala_upgrading.html">Upgrading Impala</a></li><li><a href="../topics/impala_processes.html">Starting Impala</a></li><li><a href="../topics/impala_tutorial.html">Tutorials</a></li><li><a href="../topics/impala_admin.html">Administration</a></li><li><a href="../topics/impala_security.html">Impala Security</a></li><li><a href="../topics/impala_langref.html">SQL Reference</a><ul><li><a href="../topics/impala_comments.html">Comments</a></li><li><a href="../topics/impala_datatypes.html">Data Types</a></li><li><a href="../topics/impala_literals.html">Literals</a></li><li><a href="../topics/impala_operators.html">SQL Operators</a></li><li><a href="../topics/impala_schema_objects.html">Schema Objects and Object Names</a></li><li><a href="../topics/impala_transactions.html">Transactions</a></li><li><a href="../topics/impala_langref_sql.html">SQL Statements</a><ul><li><a href="../topics/impala_ddl.html">DDL Statements</a></li><li><a href="../topics/impala_dml.html">DML Statements</a></li><li><a href="../topics/impala_alter_database.html">ALTER DATABASE</a></li><li><a href="../topics/impala_alter_table.html">ALTER TABLE</a></li><li><a href="../topics/impala_alter_view.html">ALTER VIEW</a></li><li><a href="../topics/impala_comment.html">COMMENT</a></li><li><a href="../topics/impala_compute_stats.html">COMPUTE STATS</a></li><li><a href="../topics/impala_create_database.html">CREATE DATABASE</a></li><li><a href="../topics/impala_create_function.html">CREATE FUNCTION</a></li><li><a href="../topics/impala_create_role.html">CREATE ROLE</a></li><li><a href="../topics/impala_create_table.html">CREATE TABLE</a></li><li><a href="../topics/impala_create_view.html">CREATE VIEW</a></li><li><a href="../topics/impala_delete.html">DELETE</a></li><li><a href="../topics/impala_describe.html">DESCRIBE</a></li><li><a href="../topics/impala_drop_database.html">DROP DATABASE</a></li><li><a href="../topics/impala_drop_function.html">DROP FUNCTION</a></li><li><a href="../topics/impala_drop_role.html">DROP ROLE</a></li><li><a href="../topics/impala_drop_stats.html">DROP STATS</a></li><li><a href="../topics/impala_drop_table.html">DROP TABLE</a></li><li><a href="../topics/impala_drop_view.html">DROP VIEW</a></li><li><a href="../topics/impala_explain.html">EXPLAIN</a></li><li><a href="../topics/impala_grant.html">GRANT</a></li><li><a href="../topics/impala_insert.html">INSERT</a></li><li><a href="../topics/impala_invalidate_metadata.html">INVALIDATE METADATA</a></li><li><a href="../topics/impala_load_data.html">LOAD DATA</a></li><li><a href="../topics/impala_refresh.html">REFRESH</a></li><li><a href="../topics/impala_refresh_authorization.html">REFRESH AUTHORIZATION</a></li><li><a href="../topics/impala_refresh_functions.html">REFRESH FUNCTIONS</a></li><li><a href="../topics/impala_revoke.html">REVOKE</a></li><li><a href="../topics/impala_select.html">SELECT</a></li><li><a href="../topics/impala_set.html">SET</a><ul><li><a href="../topics/impala_abort_on_error.html">ABORT ON ERROR</a></li><li><a href="../topics/impala_allow_erasure_coded_files.html">ALLOW ERASURE CODED FILES</a></li><li><a href="../topics/impala_allow_unsupported_formats.html">ALLOW UNSUPPORTED FORMATS</a></li><li><a href="../topics/impala_appx_count_distinct.html">APPX COUNT DISTINCT</a></li><li><a href="../topics/impala_batch_size.html">BATCH SIZE</a></li><li><a href="../topics/impala_broadcast_bytes_limit.html">BROADCAST BYTES LIMIT</a></li><li><a href="../topics/impala_buffer_pool_limit.html">BUFFER POOL LIMIT</a></li><li><a href="../topics/impala_compression_codec.html">COMPRESSION CODEC</a></li><li><a href="../topics/impala_compute_stats_min_sample_size.html">COMPUTE STATS MIN SAMPLE SIZE</a></li><li><a href="../topics/impala_debug_action.html">DEBUG ACTION</a></li><li><a href="../topics/impala_decimal_v2.html">DECIMAL V2</a></li><li><a href="../topics/impala_default_file_format.html">DEFAULT FILE FORMAT</a></li><li><a href="../topics/impala_default_hints_insert_statement.html">DEFAULT HINTS INSERT STATEMENT</a></li><li><a href="../topics/impala_default_join_distribution_mode.html">DEFAULT JOIN DISTRIBUTION MODE</a></li><li><a href="../topics/impala_default_spillable_buffer_size.html">DEFAULT SPILLABLE BUFFER SIZE</a></li><li><a href="../topics/impala_default_transactional_type.html">DEFAULT TRANSACTIONAL TYPE</a></li><li><a href="../topics/impala_delete_stats_in_truncate.html">DELETE STATS IN TRUNCATE</a></li><li><a href="../topics/impala_disable_codegen.html">DISABLE CODEGEN</a></li><li><a href="../topics/impala_disable_codegen_rows_threshold.html">DISABLE CODEGEN ROWS THRESHOLD</a></li><li><a href="../topics/impala_disable_hbase_num_rows_estimate.html">DISABLE HBASE NUM ROWS ESTIMATE</a></li><li><a href="../topics/impala_disable_row_runtime_filtering.html">DISABLE ROW RUNTIME FILTERING</a></li><li><a href="../topics/impala_disable_streaming_preaggregations.html">DISABLE STREAMING PREAGGREGATIONS</a></li><li><a href="../topics/impala_disable_unsafe_spills.html">DISABLE UNSAFE SPILLS</a></li><li><a href="../topics/impala_enable_expr_rewrites.html">ENABLE EXPR REWRITES</a></li><li><a href="../topics/impala_exec_single_node_rows_threshold.html">EXEC SINGLE NODE ROWS THRESHOLD</a></li><li><a href="../topics/impala_exec_time_limit_s.html">EXEC TIME LIMIT S</a></li><li><a href="../topics/impala_explain_level.html">EXPLAIN LEVEL</a></li><li><a href="../topics/impala_fetch_rows_timeout_ms.html">FETCH ROWS TIMEOUT MS</a></li><li><a href="../topics/impala_join_rows_produced_limit.html">JOIN ROWS PRODUCED LIMIT</a></li><li><a href="../topics/impala_hbase_cache_blocks.html">HBASE CACHE BLOCKS</a></li><li><a href="../topics/impala_hbase_caching.html">HBASE CACHING</a></li><li><a href="../topics/impala_idle_session_timeout.html">IDLE SESSION TIMEOUT</a></li><li><a href="../topics/impala_kudu_read_mode.html">KUDU READ MODE</a></li><li><a href="../topics/impala_live_progress.html">LIVE PROGRESS</a></li><li><a href="../topics/impala_live_summary.html">LIVE SUMMARY</a></li><li><a href="../topics/impala_max_errors.html">MAX ERRORS</a></li><li><a href="../topics/impala_max_mem_estimate_for_admission.html">MAX MEM ESTIMATE FOR ADMISSION</a></li><li><a href="../topics/impala_max_result_spooling_mem.html">MAX RESULT SPOOLING MEM</a></li><li><a href="../topics/impala_max_row_size.html">MAX ROW SIZE</a></li><li><a href="../topics/impala_max_scan_range_length.html">MAX SCAN RANGE LENGTH</a></li><li><a href="../topics/impala_max_spilled_result_spooling_mem.html">MAX SPILLED RESULT SPOOLING MEM</a></li><li><a href="../topics/impala_mem_limit.html">MEM LIMIT</a></li><li><a href="../topics/impala_min_spillable_buffer_size.html">MIN SPILLABLE BUFFER SIZE</a></li><li><a href="../topics/impala_mt_dop.html">MT DOP</a></li><li><a href="../topics/impala_num_nodes.html">NUM NODES</a></li><li><a href="../topics/impala_num_rows_produced_limit.html">NUM ROWS PRODUCED LIMIT</a></li><li><a href="../topics/impala_num_scanner_threads.html">NUM SCANNER THREADS</a></li><li><a href="../topics/impala_optimize_partition_key_scans.html">OPTIMIZE PARTITION KEY SCANS</a></li><li><a href="../topics/impala_parquet_compression_codec.html">PARQUET COMPRESSION CODEC</a></li><li><a href="../topics/impala_parquet_annotate_strings_utf8.html">PARQUET ANNOTATE STRINGS UTF8</a></li><li><a href="../topics/impala_parquet_array_resolution.html">PARQUET ARRAY RESOLUTION</a></li><li class="active"><a href="../topics/impala_parquet_dictionary_filtering.html">PARQUET DICTIONARY FILTERING</a></li><li><a href="../topics/impala_parquet_fallback_schema_resolution.html">PARQUET FALLBACK SCHEMA RESOLUTION</a></li><li><a href="../topics/impala_parquet_file_size.html">PARQUET FILE SIZE</a></li><li><a href="../topics/impala_parquet_object_store_split_size.html">PARQUET OBJECT STORE SPLIT SIZE</a></li><li><a href="../topics/impala_parquet_page_row_count_limit.html">PARQUET PAGE ROW COUNT LIMIT</a></li><li><a href="../topics/impala_parquet_read_statistics.html">PARQUET READ STATISTICS</a></li><li><a href="../topics/impala_parquet_read_page_index.html">PARQUET READ PAGE INDEX</a></li><li><a href="../topics/impala_parquet_write_page_index.html">PARQUET WRITE PAGE INDEX</a></li><li><a href="../topics/impala_prefetch_mode.html">PREFETCH MODE</a></li><li><a href="../topics/impala_query_timeout_s.html">QUERY TIMEOUT S</a></li><li><a href="../topics/impala_refresh_updated_hms.html">REFRESH UPDATED HMS PARTITIONS</a></li><li><a href="../topics/impala_replica_preference.html">REPLICA PREFERENCE</a></li><li><a href="../topics/impala_request_pool.html">REQUEST POOL</a></li><li><a href="../topics/impala_resource_trace_ratio.html">RESOURCE TRACE RATIO</a></li><li><a href="../topics/impala_retry_failed_queries.html">RETRY FAILED QUERIES</a></li><li><a href="../topics/impala_runtime_bloom_filter_size.html">RUNTIME BLOOM FILTER SIZE</a></li><li><a href="../topics/impala_runtime_filter_max_size.html">RUNTIME FILTER MAX SIZE</a></li><li><a href="../topics/impala_runtime_filter_min_size.html">RUNTIME FILTER MIN SIZE</a></li><li><a href="../topics/impala_runtime_filter_mode.html">RUNTIME FILTER MODE</a></li><li><a href="../topics/impala_runtime_filter_wait_time_ms.html">RUNTIME FILTER WAIT TIME MS</a></li><li><a href="../topics/impala_s3_skip_insert_staging.html">S3 SKIP INSERT STAGING</a></li><li><a href="../topics/impala_scan_bytes_limit.html">SCAN BYTES LIMIT</a></li><li><a href="../topics/impala_schedule_random_replica.html">SCHEDULE RANDOM REPLICA</a></li><li><a href="../topics/impala_scratch_limit.html">SCRATCH LIMIT</a></li><li><a href="../topics/impala_shuffle_distinct_exprs.html">SHUFFLE DISTINCT EXPRS</a></li><li><a href="../topics/impala_spool_query_results.html">SPOOL QUERY RESULTS</a></li><li><a href="../topics/impala_support_start_over.html">SUPPORT START OVER</a></li><li><a href="../topics/impala_sync_ddl.html">SYNC DDL</a></li><li><a href="../topics/impala_thread_reservation_aggregate_limit.html">THREAD RESERVATION AGGREGATE LIMIT</a></li><li><a href="../topics/impala_thread_reservation_limit.html">THREAD RESERVATION LIMIT</a></li><li><a href="../topics/impala_timezone.html">TIMEZONE</a></li><li><a href="../topics/impala_topn_bytes_limit.html">TOPN BYTES LIMIT</a></li><li><a href="../topics/impala_utf8_mode.html">UTF8 MODE</a></li><li><a href="../topics/impala_expand_complex_types.html">EXPAND COMPLEX TYPES</a></li></ul></li><li><a href="../topics/impala_show.html">SHOW</a></li><li><a href="../topics/impala_shutdown.html">SHUTDOWN</a></li><li><a href="../topics/impala_truncate_table.html">TRUNCATE TABLE</a></li><li><a href="../topics/impala_update.html">UPDATE</a></li><li><a href="../topics/impala_upsert.html">UPSERT</a></li><li><a href="../topics/impala_use.html">USE</a></li><li><a href="../topics/impala_values.html">VALUES</a></li><li><a href="../topics/impala_hints.html">Optimizer Hints</a></li></ul></li><li><a href="../topics/impala_functions.html">Built-In Functions</a></li><li><a href="../topics/impala_udf.html">User-Defined Functions (UDFs)</a></li><li><a href="../topics/impala_langref_unsupported.html">SQL Differences Between Impala and Hive</a></li><li><a href="../topics/impala_porting.html">Porting SQL</a></li><li><a href="../topics/impala_utf_8.html">UTF-8 Support</a></li></ul></li><li><a href="../topics/impala_performance.html">Performance Tuning</a></li><li><a href="../topics/impala_scalability.html">Scalability Considerations</a></li><li><a href="../topics/impala_resource_management.html">Resource Management</a></li><li><a href="../topics/impala_partitioning.html">Partitioning</a></li><li><a href="../topics/impala_file_formats.html">File Formats</a></li><li><a href="../topics/impala_kudu.html">Using Impala to Query Kudu Tables</a></li><li><a href="../topics/impala_hbase.html">HBase Tables</a></li><li><a href="../topics/impala_iceberg.html">Iceberg Tables</a></li><li><a href="../topics/impala_s3.html">S3 Tables</a></li><li><a href="../topics/impala_adls.html">ADLS Tables</a></li><li><a href="../topics/impala_isilon.html">Isilon Storage</a></li><li><a href="../topics/impala_ozone.html">Ozone Storage</a></li><li><a href="../topics/impala_logging.html">Logging</a></li><li><a href="../topics/impala_client.html">Client Access</a></li><li><a href="../topics/impala_fault_tolerance.html">Fault Tolerance</a></li><li><a href="../topics/impala_troubleshooting.html">Troubleshooting Impala</a></li><li><a href="../topics/impala_ports.html">Ports Used by Impala</a></li><li><a href="../topics/impala_reserved_words.html">Impala Reserved Words</a></li><li><a href="../topics/impala_faq.html">Impala Frequently Asked Questions</a></li><li><a href="../topics/impala_release_notes.html">Impala Release Notes</a></li></ul></nav><main role="main"><article role="article" aria-labelledby="ariaid-title1">
<h1 class="title topictitle1" id="ariaid-title1">PARQUET_DICTIONARY_FILTERING Query Option (<span class="keyword">Impala 2.9</span> or higher only)</h1>
<div class="body conbody">
<p class="p">
The <code class="ph codeph">PARQUET_DICTIONARY_FILTERING</code> query option controls whether Impala
uses dictionary filtering for Parquet files.
</p>
<p class="p">
To efficiently process a highly selective scan query, when this option is enabled, Impala
checks the values in the Parquet dictionary page and determines if the whole row group can
be thrown out.
</p>
<div class="p">
A column chunk is purely dictionary encoded and can be used by dictionary filtering if any
of the following conditions are met:
<ol class="ol">
<li class="li">
If the <code class="ph codeph">encoding_stats</code> is in the Parquet file, dictionary filtering
uses it to determine if there are only dictionary encoded pages (i.e. there are no
data pages with an encoding other than <code class="ph codeph">RLE_DICTIONARY</code> or
<code class="ph codeph">PLAIN_DICTIONARY</code>).
</li>
<li class="li">
If the encoding stats are not present, dictionary filtering looks at the encodings.
The column is purely dictionary encoded if both of the conditions satisfy:
<ul class="ul">
<li class="li">
<code class="ph codeph">PLAIN_DICTIONARY</code> or <code class="ph codeph">RLE_DICTIONARY</code> is present.
</li>
<li class="li">
Only <code class="ph codeph">PLAIN_DICTIONARY</code>, <code class="ph codeph">RLE_DICTIONARY</code>,
<code class="ph codeph">RLE</code>, or <code class="ph codeph">BIT_PACKED</code> encodings are listed.
</li>
</ul>
</li>
<li class="li">
Dictionary filtering works for the Parquet dictionaries with less than 40000 values if
the file was written by <span class="keyword"> or lower</span>.
</li>
</ol>
</div>
<p class="p">
In the query runtime profile output for each Impalad instance, the
<code class="ph codeph">NumDictFilteredRowGroups</code> field in the SCAN node section shows the number
of row groups that were skipped based on dictionary filtering.
</p>
<p class="p">
Note that row groups can be filtered out by Parquet statistics, and in such cases,
dictionary filtering will not be considered.
</p>
<div class="p">
The supported values for the query option are:
<ul class="ul">
<li class="li">
<code class="ph codeph">true</code> (<code class="ph codeph">1</code>): Use dictionary filtering.
</li>
<li class="li">
<code class="ph codeph">false</code> (<code class="ph codeph">0</code>): Do not use dictionary filtering
</li>
<li class="li">
Any other values are treated as <code class="ph codeph">false</code>.
</li>
</ul>
</div>
<p class="p">
<strong class="ph b">Type:</strong> Boolean
</p>
<p class="p">
<strong class="ph b">Default:</strong> <code class="ph codeph">true</code> (<code class="ph codeph">1</code>)
</p>
<p class="p">
<strong class="ph b">Added in:</strong> <span class="keyword">Impala 2.9.0</span>
</p>
</div>
<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_set.html">SET Statement</a></div></div></nav></article></main></body></html>