blob: 574603a8f5c5ea4931395e84c69c95510402dcec [file] [log] [blame]
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Hints - Spark 3.5.0 Documentation</title>
<link rel="stylesheet" href="css/bootstrap.min.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="css/custom.css" rel="stylesheet">
<script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
<link rel="stylesheet" href="css/pygments-default.css">
<link rel="stylesheet" href="css/docsearch.min.css" />
<link rel="stylesheet" href="css/docsearch.css">
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<!--[if lt IE 7]>
<p class="chromeframe">You are using an outdated browser. <a href="https://browsehappy.com/">Upgrade your browser today</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to better experience this site.</p>
<![endif]-->
<!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
<div class="navbar-brand"><a href="index.html">
<img src="img/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">3.5.0</span>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse"
data-target="#navbarCollapse" aria-controls="navbarCollapse"
aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto">
<li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
<div class="dropdown-menu" aria-labelledby="navbarQuickStart">
<a class="dropdown-item" href="quick-start.html">Quick Start</a>
<a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
<a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
<a class="dropdown-item" href="structured-streaming-programming-guide.html">Structured Streaming</a>
<a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
<a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
<a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
<a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
<a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
<div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
<a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
<a class="dropdown-item" href="api/java/index.html">Java</a>
<a class="dropdown-item" href="api/python/index.html">Python</a>
<a class="dropdown-item" href="api/R/index.html">R</a>
<a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
<div class="dropdown-menu" aria-labelledby="navbarDeploying">
<a class="dropdown-item" href="cluster-overview.html">Overview</a>
<a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
<a class="dropdown-item" href="running-on-mesos.html">Mesos</a>
<a class="dropdown-item" href="running-on-yarn.html">YARN</a>
<a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
<div class="dropdown-menu" aria-labelledby="navbarMore">
<a class="dropdown-item" href="configuration.html">Configuration</a>
<a class="dropdown-item" href="monitoring.html">Monitoring</a>
<a class="dropdown-item" href="tuning.html">Tuning Guide</a>
<a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
<a class="dropdown-item" href="security.html">Security</a>
<a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
<a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="building-spark.html">Building Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
</div>
</li>
<li class="nav-item">
<input type="text" id="docsearch-input" placeholder="Search the docs…">
</li>
</ul>
<!--<span class="navbar-text navbar-right"><span class="version-text">v3.5.0</span></span>-->
</div>
</nav>
<div class="container">
<div class="left-menu-wrapper">
<div class="left-menu">
<h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3>
<ul>
<li>
<a href="sql-getting-started.html">
Getting Started
</a>
</li>
<li>
<a href="sql-data-sources.html">
Data Sources
</a>
</li>
<li>
<a href="sql-performance-tuning.html">
Performance Tuning
</a>
</li>
<li>
<a href="sql-distributed-sql-engine.html">
Distributed SQL Engine
</a>
</li>
<li>
<a href="sql-pyspark-pandas-with-arrow.html">
PySpark Usage Guide for Pandas with Apache Arrow
</a>
</li>
<li>
<a href="sql-migration-guide.html">
Migration Guide
</a>
</li>
<li>
<a href="sql-ref.html">
SQL Reference
</a>
</li>
<ul>
<li>
<a href="sql-ref-ansi-compliance.html">
ANSI Compliance
</a>
</li>
<li>
<a href="sql-ref-datatypes.html">
Data Types
</a>
</li>
<li>
<a href="sql-ref-datetime-pattern.html">
Datetime Pattern
</a>
</li>
<li>
<a href="sql-ref-number-pattern.html">
Number Pattern
</a>
</li>
<li>
<a href="sql-ref-functions.html">
Functions
</a>
</li>
<li>
<a href="sql-ref-identifier.html">
Identifiers
</a>
</li>
<li>
<a href="sql-ref-literals.html">
Literals
</a>
</li>
<li>
<a href="sql-ref-null-semantics.html">
Null Semantics
</a>
</li>
<li>
<a href="sql-ref-syntax.html">
SQL Syntax
</a>
</li>
<ul>
<li>
<a href="sql-ref-syntax.html#ddl-statements">
Data Definition Statements
</a>
</li>
<li>
<a href="sql-ref-syntax.html#dml-statements">
Data Manipulation Statements
</a>
</li>
<li>
<a href="sql-ref-syntax.html#data-retrieval-statements">
Data Retrieval(Queries)
</a>
</li>
<li>
<a href="sql-ref-syntax.html#auxiliary-statements">
Auxiliary Statements
</a>
</li>
</ul>
</ul>
<li>
<a href="sql-error-conditions.html">
Error Conditions
</a>
</li>
</ul>
</div>
</div>
<input id="nav-trigger" class="nav-trigger" checked type="checkbox">
<label for="nav-trigger"></label>
<div class="content-with-sidebar mr-3" id="content">
<h1 class="title">Hints</h1>
<h3 id="description">Description</h3>
<p>Hints give users a way to suggest how Spark SQL to use specific approaches to generate its execution plan.</p>
<h3 id="syntax">Syntax</h3>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="cm">/*+ hint [ , ... ] */</span>
</code></pre></div></div>
<h3 id="partitioning-hints">Partitioning Hints</h3>
<p>Partitioning hints allow users to suggest a partitioning strategy that Spark should follow. <code class="language-plaintext highlighter-rouge">COALESCE</code>, <code class="language-plaintext highlighter-rouge">REPARTITION</code>,
and <code class="language-plaintext highlighter-rouge">REPARTITION_BY_RANGE</code> hints are supported and are equivalent to <code class="language-plaintext highlighter-rouge">coalesce</code>, <code class="language-plaintext highlighter-rouge">repartition</code>, and
<code class="language-plaintext highlighter-rouge">repartitionByRange</code> <a href="api/scala/org/apache/spark/sql/Dataset.html">Dataset APIs</a>, respectively. The <code class="language-plaintext highlighter-rouge">REBALANCE</code> can only
be used as a hint .These hints give users a way to tune performance and control the number of output files in Spark SQL.
When multiple partitioning hints are specified, multiple nodes are inserted into the logical plan, but the leftmost hint
is picked by the optimizer.</p>
<h4 id="partitioning-hints-types">Partitioning Hints Types</h4>
<ul>
<li>
<p><strong>COALESCE</strong></p>
<p>The <code class="language-plaintext highlighter-rouge">COALESCE</code> hint can be used to reduce the number of partitions to the specified number of partitions. It takes a partition number as a parameter.</p>
</li>
<li>
<p><strong>REPARTITION</strong></p>
<p>The <code class="language-plaintext highlighter-rouge">REPARTITION</code> hint can be used to repartition to the specified number of partitions using the specified partitioning expressions. It takes a partition number, column names, or both as parameters.</p>
</li>
<li>
<p><strong>REPARTITION_BY_RANGE</strong></p>
<p>The <code class="language-plaintext highlighter-rouge">REPARTITION_BY_RANGE</code> hint can be used to repartition to the specified number of partitions using the specified partitioning expressions. It takes column names and an optional partition number as parameters.</p>
</li>
<li>
<p><strong>REBALANCE</strong></p>
<p>The <code class="language-plaintext highlighter-rouge">REBALANCE</code> hint can be used to rebalance the query result output partitions, so that every partition is of a reasonable size (not too small and not too big). It can take column names as parameters, and try its best to partition the query result by these columns. This is a best-effort: if there are skews, Spark will split the skewed partitions, to make these partitions not too big. This hint is useful when you need to write the result of this query to a table, to avoid too small/big files. This hint is ignored if AQE is not enabled.</p>
</li>
</ul>
<h4 id="examples">Examples</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">SELECT</span> <span class="cm">/*+ COALESCE(3) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REPARTITION(3) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REPARTITION(c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REPARTITION(3, c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REPARTITION_BY_RANGE(c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REPARTITION_BY_RANGE(3, c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REBALANCE */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REBALANCE(3) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REBALANCE(c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ REBALANCE(3, c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="c1">-- multiple partitioning hints</span>
<span class="k">EXPLAIN</span> <span class="n">EXTENDED</span> <span class="k">SELECT</span> <span class="cm">/*+ REPARTITION(100), COALESCE(500), REPARTITION_BY_RANGE(3, c) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t</span><span class="p">;</span>
<span class="o">==</span> <span class="n">Parsed</span> <span class="n">Logical</span> <span class="n">Plan</span> <span class="o">==</span>
<span class="s1">'UnresolvedHint REPARTITION, [100]
+- '</span><span class="n">UnresolvedHint</span> <span class="n">COALESCE</span><span class="p">,</span> <span class="p">[</span><span class="mi">500</span><span class="p">]</span>
<span class="o">+-</span> <span class="s1">'UnresolvedHint REPARTITION_BY_RANGE, [3, '</span><span class="k">c</span><span class="p">]</span>
<span class="o">+-</span> <span class="s1">'Project [*]
+- '</span><span class="n">UnresolvedRelation</span> <span class="p">[</span><span class="n">t</span><span class="p">]</span>
<span class="o">==</span> <span class="n">Analyzed</span> <span class="n">Logical</span> <span class="n">Plan</span> <span class="o">==</span>
<span class="n">name</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="k">c</span><span class="p">:</span> <span class="nb">int</span>
<span class="n">Repartition</span> <span class="mi">100</span><span class="p">,</span> <span class="k">true</span>
<span class="o">+-</span> <span class="n">Repartition</span> <span class="mi">500</span><span class="p">,</span> <span class="k">false</span>
<span class="o">+-</span> <span class="n">RepartitionByExpression</span> <span class="p">[</span><span class="k">c</span><span class="o">#</span><span class="mi">30</span> <span class="k">ASC</span> <span class="n">NULLS</span> <span class="k">FIRST</span><span class="p">],</span> <span class="mi">3</span>
<span class="o">+-</span> <span class="n">Project</span> <span class="p">[</span><span class="n">name</span><span class="o">#</span><span class="mi">29</span><span class="p">,</span> <span class="k">c</span><span class="o">#</span><span class="mi">30</span><span class="p">]</span>
<span class="o">+-</span> <span class="n">SubqueryAlias</span> <span class="n">spark_catalog</span><span class="p">.</span><span class="k">default</span><span class="p">.</span><span class="n">t</span>
<span class="o">+-</span> <span class="n">Relation</span><span class="p">[</span><span class="n">name</span><span class="o">#</span><span class="mi">29</span><span class="p">,</span><span class="k">c</span><span class="o">#</span><span class="mi">30</span><span class="p">]</span> <span class="n">parquet</span>
<span class="o">==</span> <span class="n">Optimized</span> <span class="n">Logical</span> <span class="n">Plan</span> <span class="o">==</span>
<span class="n">Repartition</span> <span class="mi">100</span><span class="p">,</span> <span class="k">true</span>
<span class="o">+-</span> <span class="n">Relation</span><span class="p">[</span><span class="n">name</span><span class="o">#</span><span class="mi">29</span><span class="p">,</span><span class="k">c</span><span class="o">#</span><span class="mi">30</span><span class="p">]</span> <span class="n">parquet</span>
<span class="o">==</span> <span class="n">Physical</span> <span class="n">Plan</span> <span class="o">==</span>
<span class="n">Exchange</span> <span class="n">RoundRobinPartitioning</span><span class="p">(</span><span class="mi">100</span><span class="p">),</span> <span class="k">false</span><span class="p">,</span> <span class="p">[</span><span class="n">id</span><span class="o">=#</span><span class="mi">121</span><span class="p">]</span>
<span class="o">+-</span> <span class="o">*</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">ColumnarToRow</span>
<span class="o">+-</span> <span class="n">FileScan</span> <span class="n">parquet</span> <span class="k">default</span><span class="p">.</span><span class="n">t</span><span class="p">[</span><span class="n">name</span><span class="o">#</span><span class="mi">29</span><span class="p">,</span><span class="k">c</span><span class="o">#</span><span class="mi">30</span><span class="p">]</span> <span class="n">Batched</span><span class="p">:</span> <span class="k">true</span><span class="p">,</span> <span class="n">DataFilters</span><span class="p">:</span> <span class="p">[],</span> <span class="n">Format</span><span class="p">:</span> <span class="n">Parquet</span><span class="p">,</span>
<span class="k">Location</span><span class="p">:</span> <span class="n">CatalogFileIndex</span><span class="p">[</span><span class="n">file</span><span class="p">:</span><span class="o">/</span><span class="n">spark</span><span class="o">/</span><span class="n">spark</span><span class="o">-</span><span class="n">warehouse</span><span class="o">/</span><span class="n">t</span><span class="p">],</span> <span class="n">PartitionFilters</span><span class="p">:</span> <span class="p">[],</span>
<span class="n">PushedFilters</span><span class="p">:</span> <span class="p">[],</span> <span class="n">ReadSchema</span><span class="p">:</span> <span class="n">struct</span><span class="o">&lt;</span><span class="n">name</span><span class="p">:</span><span class="n">string</span><span class="o">&gt;</span>
</code></pre></div></div>
<h3 id="join-hints">Join Hints</h3>
<p>Join hints allow users to suggest the join strategy that Spark should use. Prior to Spark 3.0, only the <code class="language-plaintext highlighter-rouge">BROADCAST</code> Join Hint was supported. <code class="language-plaintext highlighter-rouge">MERGE</code>, <code class="language-plaintext highlighter-rouge">SHUFFLE_HASH</code> and <code class="language-plaintext highlighter-rouge">SHUFFLE_REPLICATE_NL</code> Joint Hints support was added in 3.0. When different join strategy hints are specified on both sides of a join, Spark prioritizes hints in the following order: <code class="language-plaintext highlighter-rouge">BROADCAST</code> over <code class="language-plaintext highlighter-rouge">MERGE</code> over <code class="language-plaintext highlighter-rouge">SHUFFLE_HASH</code> over <code class="language-plaintext highlighter-rouge">SHUFFLE_REPLICATE_NL</code>. When both sides are specified with the <code class="language-plaintext highlighter-rouge">BROADCAST</code> hint or the <code class="language-plaintext highlighter-rouge">SHUFFLE_HASH</code> hint, Spark will pick the build side based on the join type and the sizes of the relations. Since a given strategy may not support all join types, Spark is not guaranteed to use the join strategy suggested by the hint.</p>
<h4 id="join-hints-types">Join Hints Types</h4>
<ul>
<li>
<p><strong>BROADCAST</strong></p>
<p>Suggests that Spark use broadcast join. The join side with the hint will be broadcast regardless of <code class="language-plaintext highlighter-rouge">autoBroadcastJoinThreshold</code>. If both sides of the join have the broadcast hints, the one with the smaller size (based on stats) will be broadcast. The aliases for <code class="language-plaintext highlighter-rouge">BROADCAST</code> are <code class="language-plaintext highlighter-rouge">BROADCASTJOIN</code> and <code class="language-plaintext highlighter-rouge">MAPJOIN</code>.</p>
</li>
<li>
<p><strong>MERGE</strong></p>
<p>Suggests that Spark use shuffle sort merge join. The aliases for <code class="language-plaintext highlighter-rouge">MERGE</code> are <code class="language-plaintext highlighter-rouge">SHUFFLE_MERGE</code> and <code class="language-plaintext highlighter-rouge">MERGEJOIN</code>.</p>
</li>
<li>
<p><strong>SHUFFLE_HASH</strong></p>
<p>Suggests that Spark use shuffle hash join. If both sides have the shuffle hash hints, Spark chooses the smaller side (based on stats) as the build side.</p>
</li>
<li>
<p><strong>SHUFFLE_REPLICATE_NL</strong></p>
<p>Suggests that Spark use shuffle-and-replicate nested loop join.</p>
</li>
</ul>
<h4 id="examples-1">Examples</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Join Hints for broadcast join</span>
<span class="k">SELECT</span> <span class="cm">/*+ BROADCAST(t1) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ BROADCASTJOIN (t1) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">left</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ MAPJOIN(t2) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">right</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="c1">-- Join Hints for shuffle sort merge join</span>
<span class="k">SELECT</span> <span class="cm">/*+ SHUFFLE_MERGE(t1) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ MERGEJOIN(t2) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="k">SELECT</span> <span class="cm">/*+ MERGE(t1) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="c1">-- Join Hints for shuffle hash join</span>
<span class="k">SELECT</span> <span class="cm">/*+ SHUFFLE_HASH(t1) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="c1">-- Join Hints for shuffle-and-replicate nested loop join</span>
<span class="k">SELECT</span> <span class="cm">/*+ SHUFFLE_REPLICATE_NL(t1) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
<span class="c1">-- When different join strategy hints are specified on both sides of a join, Spark</span>
<span class="c1">-- prioritizes the BROADCAST hint over the MERGE hint over the SHUFFLE_HASH hint</span>
<span class="c1">-- over the SHUFFLE_REPLICATE_NL hint.</span>
<span class="c1">-- Spark will issue Warning in the following example</span>
<span class="c1">-- org.apache.spark.sql.catalyst.analysis.HintErrorLogger: Hint (strategy=merge)</span>
<span class="c1">-- is overridden by another hint and will not take effect.</span>
<span class="k">SELECT</span> <span class="cm">/*+ BROADCAST(t1), MERGE(t1, t2) */</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">t1</span> <span class="k">INNER</span> <span class="k">JOIN</span> <span class="n">t2</span> <span class="k">ON</span> <span class="n">t1</span><span class="p">.</span><span class="k">key</span> <span class="o">=</span> <span class="n">t2</span><span class="p">.</span><span class="k">key</span><span class="p">;</span>
</code></pre></div></div>
<h3 id="related-statements">Related Statements</h3>
<ul>
<li><a href="sql-ref-syntax-qry-select-join.html">JOIN</a></li>
<li><a href="sql-ref-syntax-qry-select.html">SELECT</a></li>
</ul>
</div>
<!-- /container -->
</div>
<script src="js/vendor/jquery-3.5.1.min.js"></script>
<script src="js/vendor/bootstrap.bundle.min.js"></script>
<script src="js/vendor/anchor.min.js"></script>
<script src="js/main.js"></script>
<script type="text/javascript" src="js/vendor/docsearch.min.js"></script>
<script type="text/javascript">
// DocSearch is entirely free and automated. DocSearch is built in two parts:
// 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link
// in your website and extract content from every page it traverses. It then pushes this
// content to an Algolia index.
// 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
// to your search input and display its results in a dropdown UI. If you want to find more
// details on how works DocSearch, check the docs of DocSearch.
docsearch({
apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
appId: 'RAI69RXRSK',
indexName: 'apache_spark',
inputSelector: '#docsearch-input',
enhancedSearchInput: true,
algoliaOptions: {
'facetFilters': ["version:3.5.0"]
},
debug: false // Set debug to true if you want to inspect the dropdown
});
</script>
<!-- MathJax Section -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
TeX: { equationNumbers: { autoNumber: "AMS" } }
});
</script>
<script>
// Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS.
// We could use "//cdn.mathjax...", but that won't support "file://".
(function(d, script) {
script = d.createElement('script');
script.type = 'text/javascript';
script.async = true;
script.onload = function(){
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
});
};
script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') +
'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' +
'?config=TeX-AMS-MML_HTMLorMML';
d.getElementsByTagName('head')[0].appendChild(script);
}(document));
</script>
</body>
</html>