blob: 76d39194bf0c85bbf2b5631c4feaa6523406feb7 [file] [log] [blame]
<!DOCTYPE html
SYSTEM "about:legacy-compat">
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2023"><meta name="DC.rights.owner" content="(C) Copyright 2023"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_query_options.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 3.4.x"><meta name="version" content="Impala 3.4.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="parquet_array_resolution"><link rel="stylesheet" type="text/css" href="../css/commonltr.css"><link rel="stylesheet" type="text/css" href="../css/dita-ot-doc.css"><title>PARQUET_ARRAY_RESOLUTION Query Option (Impala 2.9 or higher only)</title></head><body id="parquet_array_resolution"><header role="banner"><!--
The DITA Open Toolkit is licensed for use under the the Apache
Software Foundation License v2.0.
A copy of the Apache Software Foundation License 2.0 is
available at http://opensource.org/licenses/apache2.0.php
This statement must be included in any copies of DITA Open
Toolkit code.
--><div class="header">
<p>Apache Impala</p>
<hr>
</div></header><nav role="toc"><ul><li><a href="../topics/impala_intro.html">Introducing Apache Impala</a></li><li><a href="../topics/impala_concepts.html">Concepts and Architecture</a></li><li><a href="../topics/impala_planning.html">Deployment Planning</a></li><li><a href="../topics/impala_install.html">Installing Impala</a></li><li><a href="../topics/impala_config.html">Managing Impala</a></li><li><a href="../topics/impala_upgrading.html">Upgrading Impala</a></li><li><a href="../topics/impala_processes.html">Starting Impala</a></li><li><a href="../topics/impala_tutorial.html">Tutorials</a></li><li><a href="../topics/impala_admin.html">Administration</a></li><li><a href="../topics/impala_security.html">Impala Security</a></li><li><a href="../topics/impala_langref.html">SQL Reference</a><ul><li><a href="../topics/impala_comments.html">Comments</a></li><li><a href="../topics/impala_datatypes.html">Data Types</a></li><li><a href="../topics/impala_literals.html">Literals</a></li><li><a href="../topics/impala_operators.html">SQL Operators</a></li><li><a href="../topics/impala_schema_objects.html">Schema Objects and Object Names</a></li><li><a href="../topics/impala_transactions.html">Transactions</a></li><li><a href="../topics/impala_langref_sql.html">SQL Statements</a><ul><li><a href="../topics/impala_ddl.html">DDL Statements</a></li><li><a href="../topics/impala_dml.html">DML Statements</a></li><li><a href="../topics/impala_alter_database.html">ALTER DATABASE</a></li><li><a href="../topics/impala_alter_table.html">ALTER TABLE</a></li><li><a href="../topics/impala_alter_view.html">ALTER VIEW</a></li><li><a href="../topics/impala_comment.html">COMMENT</a></li><li><a href="../topics/impala_compute_stats.html">COMPUTE STATS</a></li><li><a href="../topics/impala_create_database.html">CREATE DATABASE</a></li><li><a href="../topics/impala_create_function.html">CREATE FUNCTION</a></li><li><a href="../topics/impala_create_role.html">CREATE ROLE</a></li><li><a href="../topics/impala_create_table.html">CREATE TABLE</a></li><li><a href="../topics/impala_create_view.html">CREATE VIEW</a></li><li><a href="../topics/impala_delete.html">DELETE</a></li><li><a href="../topics/impala_describe.html">DESCRIBE</a></li><li><a href="../topics/impala_drop_database.html">DROP DATABASE</a></li><li><a href="../topics/impala_drop_function.html">DROP FUNCTION</a></li><li><a href="../topics/impala_drop_role.html">DROP ROLE</a></li><li><a href="../topics/impala_drop_stats.html">DROP STATS</a></li><li><a href="../topics/impala_drop_table.html">DROP TABLE</a></li><li><a href="../topics/impala_drop_view.html">DROP VIEW</a></li><li><a href="../topics/impala_explain.html">EXPLAIN</a></li><li><a href="../topics/impala_grant.html">GRANT</a></li><li><a href="../topics/impala_insert.html">INSERT</a></li><li><a href="../topics/impala_invalidate_metadata.html">INVALIDATE METADATA</a></li><li><a href="../topics/impala_load_data.html">LOAD DATA</a></li><li><a href="../topics/impala_refresh.html">REFRESH</a></li><li><a href="../topics/impala_refresh_authorization.html">REFRESH AUTHORIZATION</a></li><li><a href="../topics/impala_refresh_functions.html">REFRESH FUNCTIONS</a></li><li><a href="../topics/impala_revoke.html">REVOKE</a></li><li><a href="../topics/impala_select.html">SELECT</a></li><li><a href="../topics/impala_set.html">SET</a><ul><li><a href="../topics/impala_query_options.html">Query Options for the SET Statement</a><ul><li><a href="../topics/impala_abort_on_error.html">ABORT_ON_ERROR</a></li><li><a href="../topics/impala_allow_erasure_coded_files.html">ALLOW_ERASURE_CODED_FILES</a></li><li><a href="../topics/impala_allow_unsupported_formats.html">ALLOW_UNSUPPORTED_FORMATS</a></li><li><a href="../topics/impala_appx_count_distinct.html">APPX_COUNT_DISTINCT</a></li><li><a href="../topics/impala_batch_size.html">BATCH_SIZE</a></li><li><a href="../topics/impala_broadcast_bytes_limit.html">BROADCAST_BYTES_LIMIT</a></li><li><a href="../topics/impala_buffer_pool_limit.html">BUFFER_POOL_LIMIT</a></li><li><a href="../topics/impala_compression_codec.html">COMPRESSION_CODEC</a></li><li><a href="../topics/impala_compute_stats_min_sample_size.html">COMPUTE_STATS_MIN_SAMPLE_SIZE</a></li><li><a href="../topics/impala_debug_action.html">DEBUG_ACTION</a></li><li><a href="../topics/impala_decimal_v2.html">DECIMAL_V2</a></li><li><a href="../topics/impala_default_file_format.html">DEFAULT_FILE_FORMAT</a></li><li><a href="../topics/impala_default_hints_insert_statement.html">DEFAULT_HINTS_INSERT_STATEMENT</a></li><li><a href="../topics/impala_default_join_distribution_mode.html">DEFAULT_JOIN_DISTRIBUTION_MODE</a></li><li><a href="../topics/impala_default_spillable_buffer_size.html">DEFAULT_SPILLABLE_BUFFER_SIZE</a></li><li><a href="../topics/impala_default_transactional_type.html">DEFAULT_TRANSACTIONAL_TYPE</a></li><li><a href="../topics/impala_delete_stats_in_truncate.html">DELETE_STATS_IN_TRUNCATE</a></li><li><a href="../topics/impala_disable_codegen.html">DISABLE_CODEGEN</a></li><li><a href="../topics/impala_disable_codegen_rows_threshold.html">DISABLE_CODEGEN_ROWS_THRESHOLD</a></li><li><a href="../topics/impala_disable_hbase_num_rows_estimate.html">DISABLE_HBASE_NUM_ROWS_ESTIMATE</a></li><li><a href="../topics/impala_disable_row_runtime_filtering.html">DISABLE_ROW_RUNTIME_FILTERING</a></li><li><a href="../topics/impala_disable_streaming_preaggregations.html">DISABLE_STREAMING_PREAGGREGATIONS</a></li><li><a href="../topics/impala_disable_unsafe_spills.html">DISABLE_UNSAFE_SPILLS</a></li><li><a href="../topics/impala_enable_expr_rewrites.html">ENABLE_EXPR_REWRITES</a></li><li><a href="../topics/impala_exec_single_node_rows_threshold.html">EXEC_SINGLE_NODE_ROWS_THRESHOLD</a></li><li><a href="../topics/impala_exec_time_limit_s.html">EXEC_TIME_LIMIT_S</a></li><li><a href="../topics/impala_explain_level.html">EXPLAIN_LEVEL</a></li><li><a href="../topics/impala_fetch_rows_timeout_ms.html">FETCH_ROWS_TIMEOUT_MS</a></li><li><a href="../topics/impala_join_rows_produced_limit.html">JOIN_ROWS_PRODUCED_LIMIT</a></li><li><a href="../topics/impala_hbase_cache_blocks.html">HBASE_CACHE_BLOCKS</a></li><li><a href="../topics/impala_hbase_caching.html">HBASE_CACHING</a></li><li><a href="../topics/impala_idle_session_timeout.html">IDLE_SESSION_TIMEOUT</a></li><li><a href="../topics/impala_kudu_read_mode.html">KUDU_READ_MODE</a></li><li><a href="../topics/impala_live_progress.html">LIVE_PROGRESS</a></li><li><a href="../topics/impala_live_summary.html">LIVE_SUMMARY</a></li><li><a href="../topics/impala_max_errors.html">MAX_ERRORS</a></li><li><a href="../topics/impala_max_mem_estimate_for_admission.html">MAX_MEM_ESTIMATE_FOR_ADMISSION</a></li><li><a href="../topics/impala_max_result_spooling_mem.html">MAX_RESULT_SPOOLING_MEM</a></li><li><a href="../topics/impala_max_row_size.html">MAX_ROW_SIZE</a></li><li><a href="../topics/impala_max_scan_range_length.html">MAX_SCAN_RANGE_LENGTH</a></li><li><a href="../topics/impala_max_spilled_result_spooling_mem.html">MAX_SPILLED_RESULT_SPOOLING_MEM</a></li><li><a href="../topics/impala_mem_limit.html">MEM_LIMIT</a></li><li><a href="../topics/impala_min_spillable_buffer_size.html">MIN_SPILLABLE_BUFFER_SIZE</a></li><li><a href="../topics/impala_mt_dop.html">MT_DOP</a></li><li><a href="../topics/impala_num_nodes.html">NUM_NODES</a></li><li><a href="../topics/impala_num_rows_produced_limit.html">NUM_ROWS_PRODUCED_LIMIT</a></li><li><a href="../topics/impala_num_scanner_threads.html">NUM_SCANNER_THREADS</a></li><li><a href="../topics/impala_optimize_partition_key_scans.html">OPTIMIZE_PARTITION_KEY_SCANS</a></li><li><a href="../topics/impala_parquet_compression_codec.html">PARQUET_COMPRESSION_CODEC</a></li><li><a href="../topics/impala_parquet_annotate_strings_utf8.html">PARQUET_ANNOTATE_STRINGS_UTF8</a></li><li class="active"><a href="../topics/impala_parquet_array_resolution.html">PARQUET_ARRAY_RESOLUTION</a></li><li><a href="../topics/impala_parquet_dictionary_filtering.html">PARQUET_DICTIONARY_FILTERING</a></li><li><a href="../topics/impala_parquet_fallback_schema_resolution.html">PARQUET_FALLBACK_SCHEMA_RESOLUTION</a></li><li><a href="../topics/impala_parquet_file_size.html">PARQUET_FILE_SIZE</a></li><li><a href="../topics/impala_parquet_object_store_split_size.html">PARQUET_OBJECT_STORE_SPLIT_SIZE</a></li><li><a href="../topics/impala_parquet_page_row_count_limit.html">PARQUET_PAGE_ROW_COUNT_LIMIT</a></li><li><a href="../topics/impala_parquet_read_statistics.html">PARQUET_READ_STATISTICS</a></li><li><a href="../topics/impala_parquet_read_page_index.html">PARQUET_READ_PAGE_INDEX</a></li><li><a href="../topics/impala_parquet_write_page_index.html">PARQUET_WRITE_PAGE_INDEX</a></li><li><a href="../topics/impala_prefetch_mode.html">PREFETCH_MODE</a></li><li><a href="../topics/impala_query_timeout_s.html">QUERY_TIMEOUT_S</a></li><li><a href="../topics/impala_refresh_updated_hms.html">REFRESH_UPDATED_HMS_PARTITIONS</a></li><li><a href="../topics/impala_replica_preference.html">REPLICA_PREFERENCE</a></li><li><a href="../topics/impala_request_pool.html">REQUEST_POOL</a></li><li><a href="../topics/impala_resource_trace_ratio.html">RESOURCE_TRACE_RATIO</a></li><li><a href="../topics/impala_retry_failed_queries.html">RETRY_FAILED_QUERIES</a></li><li><a href="../topics/impala_runtime_bloom_filter_size.html">RUNTIME_BLOOM_FILTER_SIZE</a></li><li><a href="../topics/impala_runtime_filter_max_size.html">RUNTIME_FILTER_MAX_SIZE</a></li><li><a href="../topics/impala_runtime_filter_min_size.html">RUNTIME_FILTER_MIN_SIZE</a></li><li><a href="../topics/impala_runtime_filter_mode.html">RUNTIME_FILTER_MODE</a></li><li><a href="../topics/impala_runtime_filter_wait_time_ms.html">RUNTIME_FILTER_WAIT_TIME_MS</a></li><li><a href="../topics/impala_s3_skip_insert_staging.html">S3_SKIP_INSERT_STAGING</a></li><li><a href="../topics/impala_scan_bytes_limit.html">SCAN_BYTES_LIMIT</a></li><li><a href="../topics/impala_schedule_random_replica.html">SCHEDULE_RANDOM_REPLICA</a></li><li><a href="../topics/impala_scratch_limit.html">SCRATCH_LIMIT</a></li><li><a href="../topics/impala_shuffle_distinct_exprs.html">SHUFFLE_DISTINCT_EXPRS</a></li><li><a href="../topics/impala_spool_query_results.html">SPOOL_QUERY_RESULTS</a></li><li><a href="../topics/impala_support_start_over.html">SUPPORT_START_OVER</a></li><li><a href="../topics/impala_sync_ddl.html">SYNC_DDL</a></li><li><a href="../topics/impala_thread_reservation_aggregate_limit.html">THREAD_RESERVATION_AGGREGATE_LIMIT</a></li><li><a href="../topics/impala_thread_reservation_limit.html">THREAD_RESERVATION_LIMIT</a></li><li><a href="../topics/impala_timezone.html">TIMEZONE</a></li><li><a href="../topics/impala_topn_bytes_limit.html">TOPN_BYTES_LIMIT</a></li><li><a href="../topics/impala_utf8_mode.html">UTF8_MODE</a></li><li><a href="../topics/impala_expand_complex_types.html">EXPAND_COMPLEX_TYPES</a></li></ul></li></ul></li><li><a href="../topics/impala_show.html">SHOW</a></li><li><a href="../topics/impala_shutdown.html">SHUTDOWN</a></li><li><a href="../topics/impala_truncate_table.html">TRUNCATE TABLE</a></li><li><a href="../topics/impala_update.html">UPDATE</a></li><li><a href="../topics/impala_upsert.html">UPSERT</a></li><li><a href="../topics/impala_use.html">USE</a></li><li><a href="../topics/impala_values.html">VALUES</a></li><li><a href="../topics/impala_hints.html">Optimizer Hints</a></li></ul></li><li><a href="../topics/impala_functions.html">Built-In Functions</a></li><li><a href="../topics/impala_udf.html">User-Defined Functions (UDFs)</a></li><li><a href="../topics/impala_langref_unsupported.html">SQL Differences Between Impala and Hive</a></li><li><a href="../topics/impala_porting.html">Porting SQL</a></li><li><a href="../topics/impala_utf_8.html">UTF-8 Support</a></li></ul></li><li><a href="../topics/impala_performance.html">Performance Tuning</a></li><li><a href="../topics/impala_scalability.html">Scalability Considerations</a></li><li><a href="../topics/impala_resource_management.html">Resource Management</a></li><li><a href="../topics/impala_partitioning.html">Partitioning</a></li><li><a href="../topics/impala_file_formats.html">File Formats</a></li><li><a href="../topics/impala_kudu.html">Using Impala to Query Kudu Tables</a></li><li><a href="../topics/impala_hbase.html">HBase Tables</a></li><li><a href="../topics/impala_iceberg.html">Iceberg Tables</a></li><li><a href="../topics/impala_s3.html">S3 Tables</a></li><li><a href="../topics/impala_adls.html">ADLS Tables</a></li><li><a href="../topics/impala_isilon.html">Isilon Storage</a></li><li><a href="../topics/impala_ozone.html">Ozone Storage</a></li><li><a href="../topics/impala_logging.html">Logging</a></li><li><a href="../topics/impala_client.html">Client Access</a></li><li><a href="../topics/impala_fault_tolerance.html">Fault Tolerance</a></li><li><a href="../topics/impala_troubleshooting.html">Troubleshooting Impala</a></li><li><a href="../topics/impala_ports.html">Ports Used by Impala</a></li><li><a href="../topics/impala_reserved_words.html">Impala Reserved Words</a></li><li><a href="../topics/impala_faq.html">Impala Frequently Asked Questions</a></li><li><a href="../topics/impala_release_notes.html">Impala Release Notes</a></li></ul></nav><main role="main"><article role="article" aria-labelledby="ariaid-title1">
<h1 class="title topictitle1" id="ariaid-title1">
PARQUET_ARRAY_RESOLUTION Query Option (<span class="keyword">Impala 2.9</span> or higher only)
</h1>
<div class="body conbody">
<p class="p">
The <code class="ph codeph">PARQUET_ARRAY_RESOLUTION</code> query option controls the
behavior of the indexed-based resolution for nested arrays in Parquet.
</p>
<p class="p">
In Parquet, you can represent an array using a 2-level or 3-level
representation. The modern, standard representation is 3-level. The legacy
2-level scheme is supported for compatibility with older Parquet files.
However, there is no reliable metadata within Parquet files to indicate
which encoding was used. It is even possible to have mixed encodings within
the same file if there are multiple arrays. The
<code class="ph codeph">PARQUET_ARRAY_RESOLUTION</code> option controls the process of
resolution that is to match every column/field reference from a query to a
column in the Parquet file.</p>
<p class="p">
The supported values for the query option are:
</p>
<ul class="ul">
<li class="li">
<code class="ph codeph">THREE_LEVEL</code>: Assumes arrays are encoded with the 3-level
representation, and does not attempt the 2-level resolution.
</li>
<li class="li">
<code class="ph codeph">TWO_LEVEL</code>: Assumes arrays are encoded with the 2-level
representation, and does not attempt the 3-level resolution.
</li>
<li class="li">
<code class="ph codeph">TWO_LEVEL_THEN_THREE_LEVEL</code>: First tries to resolve
assuming a 2-level representation, and if unsuccessful, tries a 3-level
representation.
</li>
</ul>
<p class="p">
All of the above options resolve arrays encoded with a single level.
</p>
<p class="p">
A failure to resolve a column/field reference in a query with a given array
resolution policy does not necessarily result in a warning or error returned
by the query. A mismatch might be treated like a missing column (returns
NULL values), and it is not possible to reliably distinguish the 'bad
resolution' and 'legitimately missing column' cases.
</p>
<p class="p">
The name-based policy generally does not have the problem of ambiguous
array representations. You specify to use the name-based policy by setting
the <code class="ph codeph">PARQUET_FALLBACK_SCHEMA_RESOLUTION</code> query option to
<code class="ph codeph">NAME</code>.
</p>
<p class="p">
<strong class="ph b">Type:</strong> Enum of <code class="ph codeph">TWO_LEVEL</code>,
<code class="ph codeph">TWO_LEVEL_THEN_THREE_LEVEL</code>, and
<code class="ph codeph">THREE_LEVEL</code>
</p>
<p class="p">
<strong class="ph b">Default:</strong> <code class="ph codeph">THREE_LEVEL</code>
</p>
<p class="p">
<strong class="ph b">Added in:</strong> <span class="keyword">Impala 2.9.0</span>
</p>
<p class="p">
<strong class="ph b">Examples:</strong>
</p>
<p class="p">
EXAMPLE A: The following Parquet schema of a file can be interpreted as a
2-level or 3-level:
</p>
<pre class="pre codeblock"><code>
ParquetSchemaExampleA {
optional group single_element_groups (LIST) {
repeated group single_element_group {
required int64 count;
}
}
}
</code></pre>
<p class="p">
The following table schema corresponds to a 2-level interpretation:
</p>
<pre class="pre codeblock"><code>
CREATE TABLE t (col1 array&lt;struct&lt;f1: bigint&gt;&gt;) STORED AS PARQUET;
</code></pre>
<p class="p">
Successful query with a 2-level interpretation:
</p>
<pre class="pre codeblock"><code>
SET PARQUET_ARRAY_RESOLUTION=TWO_LEVEL;
SELECT ITEM.f1 FROM t.col1;
</code></pre>
<p class="p">
The following table schema corresponds to a 3-level interpretation:
</p>
<pre class="pre codeblock"><code>
CREATE TABLE t (col1 array&lt;bigint&gt;) STORED AS PARQUET;
</code></pre>
<p class="p">
Successful query with a 3-level interpretation:
</p>
<pre class="pre codeblock"><code>
SET PARQUET_ARRAY_RESOLUTION=THREE_LEVEL;
SELECT ITEM FROM t.col1
</code></pre>
<p class="p">
EXAMPLE B: The following Parquet schema of a file can be only be successfully
interpreted as a 2-level:
</p>
<pre class="pre codeblock"><code>
ParquetSchemaExampleB {
required group list_of_ints (LIST) {
repeated int32 list_of_ints_tuple;
}
}
</code></pre>
<p class="p">
The following table schema corresponds to a 2-level interpretation:
</p>
<pre class="pre codeblock"><code>
CREATE TABLE t (col1 array&lt;int&gt;) STORED AS PARQUET;
</code></pre>
<p class="p">
Successful query with a 2-level interpretation:
</p>
<pre class="pre codeblock"><code>
SET PARQUET_ARRAY_RESOLUTION=TWO_LEVEL;
SELECT ITEM FROM t.col1
</code></pre>
<p class="p">
Unsuccessful query with a 3-level interpretation. The query returns
<code class="ph codeph">NULL</code>s as if the column was missing in the file:
</p>
<pre class="pre codeblock"><code>
SET PARQUET_ARRAY_RESOLUTION=THREE_LEVEL;
SELECT ITEM FROM t.col1
</code></pre>
</div>
<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_query_options.html">Query Options for the SET Statement</a></div></div></nav></article></main></body></html>