blob: 1bb3357f8d52086febb2bedc3ffa9002a0ac67c7 [file] [log] [blame]
<!DOCTYPE html
SYSTEM "about:legacy-compat">
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2023"><meta name="DC.rights.owner" content="(C) Copyright 2023"><meta name="DC.Type" content="concept"><meta name="DC.Relation" scheme="URI" content="../topics/impala_set.html"><meta name="prodname" content="Impala"><meta name="prodname" content="Impala"><meta name="version" content="Impala 3.4.x"><meta name="version" content="Impala 3.4.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="default_join_distribution_mode"><link rel="stylesheet" type="text/css" href="../css/commonltr.css"><link rel="stylesheet" type="text/css" href="../css/dita-ot-doc.css"><title>DEFAULT_JOIN_DISTRIBUTION_MODE Query Option</title></head><body id="default_join_distribution_mode"><header role="banner"><!--
The DITA Open Toolkit is licensed for use under the the Apache
Software Foundation License v2.0.
A copy of the Apache Software Foundation License 2.0 is
available at http://opensource.org/licenses/apache2.0.php
This statement must be included in any copies of DITA Open
Toolkit code.
--><div class="header">
<p>Apache Impala</p>
<hr>
</div></header><nav role="toc"><ul><li><a href="../topics/impala_intro.html">Introducing Apache Impala</a></li><li><a href="../topics/impala_concepts.html">Concepts and Architecture</a></li><li><a href="../topics/impala_planning.html">Deployment Planning</a></li><li><a href="../topics/impala_install.html">Installing Impala</a></li><li><a href="../topics/impala_config.html">Managing Impala</a></li><li><a href="../topics/impala_upgrading.html">Upgrading Impala</a></li><li><a href="../topics/impala_processes.html">Starting Impala</a></li><li><a href="../topics/impala_tutorial.html">Tutorials</a></li><li><a href="../topics/impala_admin.html">Administration</a></li><li><a href="../topics/impala_security.html">Impala Security</a></li><li><a href="../topics/impala_langref.html">SQL Reference</a><ul><li><a href="../topics/impala_comments.html">Comments</a></li><li><a href="../topics/impala_datatypes.html">Data Types</a></li><li><a href="../topics/impala_literals.html">Literals</a></li><li><a href="../topics/impala_operators.html">SQL Operators</a></li><li><a href="../topics/impala_schema_objects.html">Schema Objects and Object Names</a></li><li><a href="../topics/impala_transactions.html">Transactions</a></li><li><a href="../topics/impala_langref_sql.html">SQL Statements</a><ul><li><a href="../topics/impala_ddl.html">DDL Statements</a></li><li><a href="../topics/impala_dml.html">DML Statements</a></li><li><a href="../topics/impala_alter_database.html">ALTER DATABASE</a></li><li><a href="../topics/impala_alter_table.html">ALTER TABLE</a></li><li><a href="../topics/impala_alter_view.html">ALTER VIEW</a></li><li><a href="../topics/impala_comment.html">COMMENT</a></li><li><a href="../topics/impala_compute_stats.html">COMPUTE STATS</a></li><li><a href="../topics/impala_create_database.html">CREATE DATABASE</a></li><li><a href="../topics/impala_create_function.html">CREATE FUNCTION</a></li><li><a href="../topics/impala_create_role.html">CREATE ROLE</a></li><li><a href="../topics/impala_create_table.html">CREATE TABLE</a></li><li><a href="../topics/impala_create_view.html">CREATE VIEW</a></li><li><a href="../topics/impala_delete.html">DELETE</a></li><li><a href="../topics/impala_describe.html">DESCRIBE</a></li><li><a href="../topics/impala_drop_database.html">DROP DATABASE</a></li><li><a href="../topics/impala_drop_function.html">DROP FUNCTION</a></li><li><a href="../topics/impala_drop_role.html">DROP ROLE</a></li><li><a href="../topics/impala_drop_stats.html">DROP STATS</a></li><li><a href="../topics/impala_drop_table.html">DROP TABLE</a></li><li><a href="../topics/impala_drop_view.html">DROP VIEW</a></li><li><a href="../topics/impala_explain.html">EXPLAIN</a></li><li><a href="../topics/impala_grant.html">GRANT</a></li><li><a href="../topics/impala_insert.html">INSERT</a></li><li><a href="../topics/impala_invalidate_metadata.html">INVALIDATE METADATA</a></li><li><a href="../topics/impala_load_data.html">LOAD DATA</a></li><li><a href="../topics/impala_refresh.html">REFRESH</a></li><li><a href="../topics/impala_refresh_authorization.html">REFRESH AUTHORIZATION</a></li><li><a href="../topics/impala_refresh_functions.html">REFRESH FUNCTIONS</a></li><li><a href="../topics/impala_revoke.html">REVOKE</a></li><li><a href="../topics/impala_select.html">SELECT</a></li><li><a href="../topics/impala_set.html">SET</a><ul><li><a href="../topics/impala_abort_on_error.html">ABORT ON ERROR</a></li><li><a href="../topics/impala_allow_erasure_coded_files.html">ALLOW ERASURE CODED FILES</a></li><li><a href="../topics/impala_allow_unsupported_formats.html">ALLOW UNSUPPORTED FORMATS</a></li><li><a href="../topics/impala_appx_count_distinct.html">APPX COUNT DISTINCT</a></li><li><a href="../topics/impala_batch_size.html">BATCH SIZE</a></li><li><a href="../topics/impala_broadcast_bytes_limit.html">BROADCAST BYTES LIMIT</a></li><li><a href="../topics/impala_buffer_pool_limit.html">BUFFER POOL LIMIT</a></li><li><a href="../topics/impala_compression_codec.html">COMPRESSION CODEC</a></li><li><a href="../topics/impala_compute_stats_min_sample_size.html">COMPUTE STATS MIN SAMPLE SIZE</a></li><li><a href="../topics/impala_debug_action.html">DEBUG ACTION</a></li><li><a href="../topics/impala_decimal_v2.html">DECIMAL V2</a></li><li><a href="../topics/impala_default_file_format.html">DEFAULT FILE FORMAT</a></li><li><a href="../topics/impala_default_hints_insert_statement.html">DEFAULT HINTS INSERT STATEMENT</a></li><li class="active"><a href="../topics/impala_default_join_distribution_mode.html">DEFAULT JOIN DISTRIBUTION MODE</a></li><li><a href="../topics/impala_default_spillable_buffer_size.html">DEFAULT SPILLABLE BUFFER SIZE</a></li><li><a href="../topics/impala_default_transactional_type.html">DEFAULT TRANSACTIONAL TYPE</a></li><li><a href="../topics/impala_delete_stats_in_truncate.html">DELETE STATS IN TRUNCATE</a></li><li><a href="../topics/impala_disable_codegen.html">DISABLE CODEGEN</a></li><li><a href="../topics/impala_disable_codegen_rows_threshold.html">DISABLE CODEGEN ROWS THRESHOLD</a></li><li><a href="../topics/impala_disable_hbase_num_rows_estimate.html">DISABLE HBASE NUM ROWS ESTIMATE</a></li><li><a href="../topics/impala_disable_row_runtime_filtering.html">DISABLE ROW RUNTIME FILTERING</a></li><li><a href="../topics/impala_disable_streaming_preaggregations.html">DISABLE STREAMING PREAGGREGATIONS</a></li><li><a href="../topics/impala_disable_unsafe_spills.html">DISABLE UNSAFE SPILLS</a></li><li><a href="../topics/impala_enable_expr_rewrites.html">ENABLE EXPR REWRITES</a></li><li><a href="../topics/impala_exec_single_node_rows_threshold.html">EXEC SINGLE NODE ROWS THRESHOLD</a></li><li><a href="../topics/impala_exec_time_limit_s.html">EXEC TIME LIMIT S</a></li><li><a href="../topics/impala_explain_level.html">EXPLAIN LEVEL</a></li><li><a href="../topics/impala_fetch_rows_timeout_ms.html">FETCH ROWS TIMEOUT MS</a></li><li><a href="../topics/impala_join_rows_produced_limit.html">JOIN ROWS PRODUCED LIMIT</a></li><li><a href="../topics/impala_hbase_cache_blocks.html">HBASE CACHE BLOCKS</a></li><li><a href="../topics/impala_hbase_caching.html">HBASE CACHING</a></li><li><a href="../topics/impala_idle_session_timeout.html">IDLE SESSION TIMEOUT</a></li><li><a href="../topics/impala_kudu_read_mode.html">KUDU READ MODE</a></li><li><a href="../topics/impala_live_progress.html">LIVE PROGRESS</a></li><li><a href="../topics/impala_live_summary.html">LIVE SUMMARY</a></li><li><a href="../topics/impala_max_errors.html">MAX ERRORS</a></li><li><a href="../topics/impala_max_mem_estimate_for_admission.html">MAX MEM ESTIMATE FOR ADMISSION</a></li><li><a href="../topics/impala_max_result_spooling_mem.html">MAX RESULT SPOOLING MEM</a></li><li><a href="../topics/impala_max_row_size.html">MAX ROW SIZE</a></li><li><a href="../topics/impala_max_scan_range_length.html">MAX SCAN RANGE LENGTH</a></li><li><a href="../topics/impala_max_spilled_result_spooling_mem.html">MAX SPILLED RESULT SPOOLING MEM</a></li><li><a href="../topics/impala_mem_limit.html">MEM LIMIT</a></li><li><a href="../topics/impala_min_spillable_buffer_size.html">MIN SPILLABLE BUFFER SIZE</a></li><li><a href="../topics/impala_mt_dop.html">MT DOP</a></li><li><a href="../topics/impala_num_nodes.html">NUM NODES</a></li><li><a href="../topics/impala_num_rows_produced_limit.html">NUM ROWS PRODUCED LIMIT</a></li><li><a href="../topics/impala_num_scanner_threads.html">NUM SCANNER THREADS</a></li><li><a href="../topics/impala_optimize_partition_key_scans.html">OPTIMIZE PARTITION KEY SCANS</a></li><li><a href="../topics/impala_parquet_compression_codec.html">PARQUET COMPRESSION CODEC</a></li><li><a href="../topics/impala_parquet_annotate_strings_utf8.html">PARQUET ANNOTATE STRINGS UTF8</a></li><li><a href="../topics/impala_parquet_array_resolution.html">PARQUET ARRAY RESOLUTION</a></li><li><a href="../topics/impala_parquet_dictionary_filtering.html">PARQUET DICTIONARY FILTERING</a></li><li><a href="../topics/impala_parquet_fallback_schema_resolution.html">PARQUET FALLBACK SCHEMA RESOLUTION</a></li><li><a href="../topics/impala_parquet_file_size.html">PARQUET FILE SIZE</a></li><li><a href="../topics/impala_parquet_object_store_split_size.html">PARQUET OBJECT STORE SPLIT SIZE</a></li><li><a href="../topics/impala_parquet_page_row_count_limit.html">PARQUET PAGE ROW COUNT LIMIT</a></li><li><a href="../topics/impala_parquet_read_statistics.html">PARQUET READ STATISTICS</a></li><li><a href="../topics/impala_parquet_read_page_index.html">PARQUET READ PAGE INDEX</a></li><li><a href="../topics/impala_parquet_write_page_index.html">PARQUET WRITE PAGE INDEX</a></li><li><a href="../topics/impala_prefetch_mode.html">PREFETCH MODE</a></li><li><a href="../topics/impala_query_timeout_s.html">QUERY TIMEOUT S</a></li><li><a href="../topics/impala_refresh_updated_hms.html">REFRESH UPDATED HMS PARTITIONS</a></li><li><a href="../topics/impala_replica_preference.html">REPLICA PREFERENCE</a></li><li><a href="../topics/impala_request_pool.html">REQUEST POOL</a></li><li><a href="../topics/impala_resource_trace_ratio.html">RESOURCE TRACE RATIO</a></li><li><a href="../topics/impala_retry_failed_queries.html">RETRY FAILED QUERIES</a></li><li><a href="../topics/impala_runtime_bloom_filter_size.html">RUNTIME BLOOM FILTER SIZE</a></li><li><a href="../topics/impala_runtime_filter_max_size.html">RUNTIME FILTER MAX SIZE</a></li><li><a href="../topics/impala_runtime_filter_min_size.html">RUNTIME FILTER MIN SIZE</a></li><li><a href="../topics/impala_runtime_filter_mode.html">RUNTIME FILTER MODE</a></li><li><a href="../topics/impala_runtime_filter_wait_time_ms.html">RUNTIME FILTER WAIT TIME MS</a></li><li><a href="../topics/impala_s3_skip_insert_staging.html">S3 SKIP INSERT STAGING</a></li><li><a href="../topics/impala_scan_bytes_limit.html">SCAN BYTES LIMIT</a></li><li><a href="../topics/impala_schedule_random_replica.html">SCHEDULE RANDOM REPLICA</a></li><li><a href="../topics/impala_scratch_limit.html">SCRATCH LIMIT</a></li><li><a href="../topics/impala_shuffle_distinct_exprs.html">SHUFFLE DISTINCT EXPRS</a></li><li><a href="../topics/impala_spool_query_results.html">SPOOL QUERY RESULTS</a></li><li><a href="../topics/impala_support_start_over.html">SUPPORT START OVER</a></li><li><a href="../topics/impala_sync_ddl.html">SYNC DDL</a></li><li><a href="../topics/impala_thread_reservation_aggregate_limit.html">THREAD RESERVATION AGGREGATE LIMIT</a></li><li><a href="../topics/impala_thread_reservation_limit.html">THREAD RESERVATION LIMIT</a></li><li><a href="../topics/impala_timezone.html">TIMEZONE</a></li><li><a href="../topics/impala_topn_bytes_limit.html">TOPN BYTES LIMIT</a></li><li><a href="../topics/impala_utf8_mode.html">UTF8 MODE</a></li><li><a href="../topics/impala_expand_complex_types.html">EXPAND COMPLEX TYPES</a></li></ul></li><li><a href="../topics/impala_show.html">SHOW</a></li><li><a href="../topics/impala_shutdown.html">SHUTDOWN</a></li><li><a href="../topics/impala_truncate_table.html">TRUNCATE TABLE</a></li><li><a href="../topics/impala_update.html">UPDATE</a></li><li><a href="../topics/impala_upsert.html">UPSERT</a></li><li><a href="../topics/impala_use.html">USE</a></li><li><a href="../topics/impala_values.html">VALUES</a></li><li><a href="../topics/impala_hints.html">Optimizer Hints</a></li></ul></li><li><a href="../topics/impala_functions.html">Built-In Functions</a></li><li><a href="../topics/impala_udf.html">User-Defined Functions (UDFs)</a></li><li><a href="../topics/impala_langref_unsupported.html">SQL Differences Between Impala and Hive</a></li><li><a href="../topics/impala_porting.html">Porting SQL</a></li><li><a href="../topics/impala_utf_8.html">UTF-8 Support</a></li></ul></li><li><a href="../topics/impala_performance.html">Performance Tuning</a></li><li><a href="../topics/impala_scalability.html">Scalability Considerations</a></li><li><a href="../topics/impala_resource_management.html">Resource Management</a></li><li><a href="../topics/impala_partitioning.html">Partitioning</a></li><li><a href="../topics/impala_file_formats.html">File Formats</a></li><li><a href="../topics/impala_kudu.html">Using Impala to Query Kudu Tables</a></li><li><a href="../topics/impala_hbase.html">HBase Tables</a></li><li><a href="../topics/impala_iceberg.html">Iceberg Tables</a></li><li><a href="../topics/impala_s3.html">S3 Tables</a></li><li><a href="../topics/impala_adls.html">ADLS Tables</a></li><li><a href="../topics/impala_isilon.html">Isilon Storage</a></li><li><a href="../topics/impala_ozone.html">Ozone Storage</a></li><li><a href="../topics/impala_logging.html">Logging</a></li><li><a href="../topics/impala_client.html">Client Access</a></li><li><a href="../topics/impala_fault_tolerance.html">Fault Tolerance</a></li><li><a href="../topics/impala_troubleshooting.html">Troubleshooting Impala</a></li><li><a href="../topics/impala_ports.html">Ports Used by Impala</a></li><li><a href="../topics/impala_reserved_words.html">Impala Reserved Words</a></li><li><a href="../topics/impala_faq.html">Impala Frequently Asked Questions</a></li><li><a href="../topics/impala_release_notes.html">Impala Release Notes</a></li></ul></nav><main role="main"><article role="article" aria-labelledby="ariaid-title1">
<h1 class="title topictitle1" id="ariaid-title1">DEFAULT_JOIN_DISTRIBUTION_MODE Query Option</h1>
<div class="body conbody">
<p class="p">
This option determines the join distribution that Impala uses when any of the tables
involved in a join query is missing statistics.
</p>
<p class="p">
Impala optimizes join queries based on the presence of table statistics,
which are produced by the Impala <code class="ph codeph">COMPUTE STATS</code> statement.
By default, when a table involved in the join query does not have statistics,
Impala uses the <span class="q">"broadcast"</span> technique that transmits the entire contents
of the table to all executor nodes participating in the query. If one table
involved in a join has statistics and the other does not, the table without
statistics is broadcast. If both tables are missing statistics, the table
that is referenced second in the join order is broadcast. This behavior
is appropriate when the table involved is relatively small, but can lead to
excessive network, memory, and CPU overhead if the table being broadcast is
large.
</p>
<p class="p">
Because Impala queries frequently involve very large tables, and suboptimal
joins for such tables could result in spilling or out-of-memory errors,
the setting <code class="ph codeph">DEFAULT_JOIN_DISTRIBUTION_MODE=SHUFFLE</code> lets you
override the default behavior. The shuffle join mechanism divides the corresponding rows
of each table involved in a join query using a hashing algorithm, and transmits
subsets of the rows to other nodes for processing. Typically, this kind of join is
more efficient for joins between large tables of similar size.
</p>
<p class="p">
The setting <code class="ph codeph">DEFAULT_JOIN_DISTRIBUTION_MODE=SHUFFLE</code> is
recommended when setting up and deploying new clusters, because it is less likely
to result in serious consequences such as spilling or out-of-memory errors if
the query plan is based on incomplete information. This setting is not the default,
to avoid changing the performance characteristics of join queries for clusters that
are already tuned for their existing workloads.
</p>
<p class="p">
<strong class="ph b">Type:</strong> integer
</p>
<p class="p">
The allowed values are <code class="ph codeph">BROADCAST</code> (equivalent to 0)
or <code class="ph codeph">SHUFFLE</code> (equivalent to 1).
</p>
<p class="p">
<strong class="ph b">Examples:</strong>
</p>
<p class="p">
The following examples demonstrate appropriate scenarios for each
setting of this query option.
</p>
<pre class="pre codeblock"><code>
-- Create a billion-row table.
create table big_table stored as parquet
as select * from huge_table limit 1e9;
-- For a big table with no statistics, the
-- shuffle join mechanism is appropriate.
set default_join_distribution_mode=shuffle;
...join queries involving the big table...
</code></pre>
<pre class="pre codeblock"><code>
-- Create a hundred-row table.
create table tiny_table stored as parquet
as select * from huge_table limit 100;
-- For a tiny table with no statistics, the
-- broadcast join mechanism is appropriate.
set default_join_distribution_mode=broadcast;
...join queries involving the tiny table...
</code></pre>
<pre class="pre codeblock"><code>
compute stats tiny_table;
compute stats big_table;
-- Once the stats are computed, the query option has
-- no effect on join queries involving these tables.
-- Impala can determine the absolute and relative sizes
-- of each side of the join query by examining the
-- row size, cardinality, and so on of each table.
...join queries involving both of these tables...
</code></pre>
<p class="p">
<strong class="ph b">Related information:</strong>
</p>
<p class="p">
<a class="xref" href="impala_compute_stats.html">COMPUTE STATS Statement</a>,
<a class="xref" href="impala_joins.html">Joins in Impala SELECT Statements</a>,
<a class="xref" href="impala_perf_joins.html">Performance Considerations for Join Queries</a>
</p>
</div>
<nav role="navigation" class="related-links"><div class="familylinks"><div class="parentlink"><strong>Parent topic:</strong> <a class="link" href="../topics/impala_set.html">SET Statement</a></div></div></nav></article></main></body></html>