blob: 291ab2f25cb9d695ffc04423284425b17bf3a585 [file] [log] [blame]
<!DOCTYPE html>
<html class="no-js">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SQL Pipe Syntax - Spark 4.1.0-preview1 Documentation</title>
<link rel="stylesheet" href="css/bootstrap.min.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="css/custom.css" rel="stylesheet">
<script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
<link rel="stylesheet" href="css/pygments-default.css">
<link rel="stylesheet" href="css/docsearch.min.css" />
<link rel="stylesheet" href="css/docsearch.css">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
<div class="navbar-brand"><a href="index.html">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">4.1.0-preview1</span>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse"
data-target="#navbarCollapse" aria-controls="navbarCollapse"
aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto">
<li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
<div class="dropdown-menu" aria-labelledby="navbarQuickStart">
<a class="dropdown-item" href="quick-start.html">Quick Start</a>
<a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
<a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
<a class="dropdown-item" href="streaming/index.html">Structured Streaming</a>
<a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
<a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
<a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
<a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
<a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
<a class="dropdown-item" href="declarative-pipelines-programming-guide.html">Declarative Pipelines</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
<div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
<a class="dropdown-item" href="api/python/index.html">Python</a>
<a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
<a class="dropdown-item" href="api/java/index.html">Java</a>
<a class="dropdown-item" href="api/R/index.html">R</a>
<a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
<div class="dropdown-menu" aria-labelledby="navbarDeploying">
<a class="dropdown-item" href="cluster-overview.html">Overview</a>
<a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
<a class="dropdown-item" href="running-on-yarn.html">YARN</a>
<a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
<div class="dropdown-menu" aria-labelledby="navbarMore">
<a class="dropdown-item" href="configuration.html">Configuration</a>
<a class="dropdown-item" href="monitoring.html">Monitoring</a>
<a class="dropdown-item" href="tuning.html">Tuning Guide</a>
<a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
<a class="dropdown-item" href="security.html">Security</a>
<a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
<a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="building-spark.html">Building Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
</div>
</li>
<li class="nav-item">
<input type="text" id="docsearch-input" placeholder="Search the docs…">
</li>
</ul>
<!--<span class="navbar-text navbar-right"><span class="version-text">v4.1.0-preview1</span></span>-->
</div>
</nav>
<div class="container">
<div class="left-menu-wrapper">
<div class="left-menu">
<h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3>
<ul>
<li>
<a href="sql-getting-started.html">
Getting Started
</a>
</li>
<li>
<a href="sql-data-sources.html">
Data Sources
</a>
</li>
<li>
<a href="sql-performance-tuning.html">
Performance Tuning
</a>
</li>
<li>
<a href="sql-distributed-sql-engine.html">
Distributed SQL Engine
</a>
</li>
<li>
<a href="sql-pyspark-pandas-with-arrow.html">
PySpark Usage Guide for Pandas with Apache Arrow
</a>
</li>
<li>
<a href="sql-migration-guide.html">
Migration Guide
</a>
</li>
<li>
<a href="sql-ref.html">
SQL Reference
</a>
</li>
<li>
<a href="sql-error-conditions.html">
Error Conditions
</a>
</li>
</ul>
</div>
</div>
<input id="nav-trigger" class="nav-trigger" checked type="checkbox">
<label for="nav-trigger"></label>
<div class="content-with-sidebar mr-3" id="content">
<h1 class="title">SQL Pipe Syntax</h1>
<h3 id="syntax">Syntax</h3>
<h4 id="overview">Overview</h4>
<p>Apache Spark supports SQL pipe syntax which allows composing queries from combinations of operators.</p>
<ul>
<li>Any query can have zero or more pipe operators as a suffix, delineated by the pipe character <code class="language-plaintext highlighter-rouge">|&gt;</code>.</li>
<li>Each pipe operator starts with one or more SQL keywords followed by its own grammar as described
in the table below.</li>
<li>Most of these operators reuse existing grammar for standard SQL clauses.</li>
<li>Operators can apply in any order, any number of times.</li>
</ul>
<p><code class="language-plaintext highlighter-rouge">FROM &lt;tableName&gt;</code> is now a supported standalone query which behaves the same as
<code class="language-plaintext highlighter-rouge">TABLE &lt;tableName&gt;</code>. This provides a convenient starting place to begin a chained pipe SQL query,
although it is possible to add one or more pipe operators to the end of any valid Spark SQL query
with the same consistent behavior as written here.</p>
<p>Please refer to the table at the end of this document for a full list of all supported operators
and their semantics.</p>
<h4 id="example">Example</h4>
<p>For example, this is query 13 from the TPC-H benchmark:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">SELECT</span> <span class="n">c_count</span><span class="p">,</span> <span class="k">COUNT</span><span class="p">(</span><span class="o">*</span><span class="p">)</span> <span class="k">AS</span> <span class="n">custdist</span>
<span class="k">FROM</span>
<span class="p">(</span><span class="k">SELECT</span> <span class="n">c_custkey</span><span class="p">,</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">o_orderkey</span><span class="p">)</span> <span class="n">c_count</span>
<span class="k">FROM</span> <span class="n">customer</span>
<span class="k">LEFT</span> <span class="k">OUTER</span> <span class="k">JOIN</span> <span class="n">orders</span> <span class="k">ON</span> <span class="n">c_custkey</span> <span class="o">=</span> <span class="n">o_custkey</span>
<span class="k">AND</span> <span class="n">o_comment</span> <span class="k">NOT</span> <span class="k">LIKE</span> <span class="s1">'%unusual%packages%'</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_custkey</span>
<span class="p">)</span> <span class="k">AS</span> <span class="n">c_orders</span>
<span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_count</span>
<span class="k">ORDER</span> <span class="k">BY</span> <span class="n">custdist</span> <span class="k">DESC</span><span class="p">,</span> <span class="n">c_count</span> <span class="k">DESC</span><span class="p">;</span>
</code></pre></div></div>
<p>To write the same logic using SQL pipe operators, we express it like this:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">FROM</span> <span class="n">customer</span>
<span class="o">|&gt;</span> <span class="k">LEFT</span> <span class="k">OUTER</span> <span class="k">JOIN</span> <span class="n">orders</span> <span class="k">ON</span> <span class="n">c_custkey</span> <span class="o">=</span> <span class="n">o_custkey</span>
<span class="k">AND</span> <span class="n">o_comment</span> <span class="k">NOT</span> <span class="k">LIKE</span> <span class="s1">'%unusual%packages%'</span>
<span class="o">|&gt;</span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">o_orderkey</span><span class="p">)</span> <span class="n">c_count</span>
<span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_custkey</span>
<span class="o">|&gt;</span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="o">*</span><span class="p">)</span> <span class="k">AS</span> <span class="n">custdist</span>
<span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_count</span>
<span class="o">|&gt;</span> <span class="k">ORDER</span> <span class="k">BY</span> <span class="n">custdist</span> <span class="k">DESC</span><span class="p">,</span> <span class="n">c_count</span> <span class="k">DESC</span><span class="p">;</span>
</code></pre></div></div>
<h4 id="source-tables">Source Tables</h4>
<p>To start a new query using SQL pipe syntax, use the <code class="language-plaintext highlighter-rouge">FROM &lt;tableName&gt;</code> or <code class="language-plaintext highlighter-rouge">TABLE &lt;tableName&gt;</code>
clause, which creates a relation comprising all rows from the source table. Then append one or more
pipe operators to the end of this clause to perform further transformations.</p>
<h4 id="projections">Projections</h4>
<p>SQL pipe syntax supports composable ways to evaluate expressions. A major advantage of these
projection features is that they support computing new expressions based on previous ones in an
incremental way. No lateral column references are needed here since each operator applies
independently on its input table, regardless of the order in which the operators appear. Each of
these computed columns then becomes visible to use with the following operator.</p>
<p><code class="language-plaintext highlighter-rouge">SELECT</code> produces a new table by evaluating the provided expressions.<br />
It is possible to use <code class="language-plaintext highlighter-rouge">DISTINCT</code> and <code class="language-plaintext highlighter-rouge">*</code> as needed.<br />
This works like the outermost <code class="language-plaintext highlighter-rouge">SELECT</code> in a table subquery in regular Spark SQL.</p>
<p><code class="language-plaintext highlighter-rouge">EXTEND</code> adds new columns to the input table by evaluating the provided expressions.<br />
This also preserves table aliases.<br />
This works like <code class="language-plaintext highlighter-rouge">SELECT *, new_column</code> in regular Spark SQL.</p>
<p><code class="language-plaintext highlighter-rouge">DROP</code> removes columns from the input table.<br />
This is similar to <code class="language-plaintext highlighter-rouge">SELECT * EXCEPT (column)</code> in regular Spark SQL.</p>
<p><code class="language-plaintext highlighter-rouge">SET</code> replaces column values from the input table.<br />
This is similar to <code class="language-plaintext highlighter-rouge">SELECT * REPLACE (expression AS column)</code> in regular Spark SQL.</p>
<p><code class="language-plaintext highlighter-rouge">AS</code> forwards the input table and introduces a new alias for each row.</p>
<h4 id="aggregations">Aggregations</h4>
<p>In general, aggregation takes place differently using SQL pipe syntax as opposed to regular Spark
SQL.</p>
<p>To perform full-table aggregation, use the <code class="language-plaintext highlighter-rouge">AGGREGATE</code> operator with a list of aggregate
expressions to evaluate. This returns one single row in the output table.</p>
<p>To perform aggregation with grouping, use the <code class="language-plaintext highlighter-rouge">AGGREGATE</code> operator with a <code class="language-plaintext highlighter-rouge">GROUP BY</code> clause.
This returns one row for each unique combination of values of the grouping expressions. The output
table contains the evaluated grouping expressions followed by the evaluated aggregate functions.
Grouping expressions support assigning aliases for purposes of referring to them in future
operators. In this way, it is not necessary to repeat entire expressions between <code class="language-plaintext highlighter-rouge">GROUP BY</code> and
<code class="language-plaintext highlighter-rouge">SELECT</code>, since <code class="language-plaintext highlighter-rouge">AGGREGATE</code> is a single operator that performs both.</p>
<h4 id="other-transformations">Other Transformations</h4>
<p>The remaining operators are used for other transformations, such as filtering, joining, sorting,
sampling, and set operations. These operators generally work in the same way as in regular Spark
SQL, as described in the table below.</p>
<h3 id="independence-and-interoperability">Independence and Interoperability</h3>
<p>SQL pipe syntax works in Spark without any backwards-compatibility concerns with existing SQL
queries; it is possible to write any query using regular Spark SQL, pipe syntax, or a combination of
the two. As a consequence, the following invariants always hold:</p>
<ul>
<li>Each pipe operator receives an input table and operates the same way on its rows regardless of how
it was computed.</li>
<li>For any valid chain of N SQL pipe operators, any subset of the first M &lt;= N operators also
represents a valid query.<br />
This property can be useful for introspection and debugging, such as by selected a subset of
lines and using the &#8220;run highlighted text&#8221; feature of SQL editors like Jupyter notebooks.</li>
<li>It is possible to append pipe operators to any valid query written in regular Spark SQL.<br />
The canonical way of starting pipe syntax queries is with the <code class="language-plaintext highlighter-rouge">FROM &lt;tableName&gt;</code> clause.<br />
Note that this is a valid standalone query and may be replaced with any other Spark SQL query
without loss of generality.</li>
<li>Table subqueries can be written using either regular Spark SQL syntax or pipe syntax.<br />
They may appear inside enclosing queries written in either syntax.</li>
<li>Other Spark SQL statements such as views and DDL and DML commands may include queries written
using either syntax.</li>
</ul>
<h3 id="supported-operators">Supported Operators</h3>
<table>
<thead>
<tr>
<th>Operator</th>
<th>Output rows</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="#from-or-table">FROM</a> or <a href="#from-or-table">TABLE</a></td>
<td>Returns all the output rows from the source table unmodified.</td>
</tr>
<tr>
<td><a href="#select">SELECT</a></td>
<td>Evaluates the provided expressions over each of the rows of the input table.</td>
</tr>
<tr>
<td><a href="#extend">EXTEND</a></td>
<td>Appends new columns to the input table by evaluating the specified expressions over each of the input rows.</td>
</tr>
<tr>
<td><a href="#set">SET</a></td>
<td>Updates columns of the input table by replacing them with the result of evaluating the provided expressions.</td>
</tr>
<tr>
<td><a href="#drop">DROP</a></td>
<td>Drops columns of the input table by name.</td>
</tr>
<tr>
<td><a href="#as">AS</a></td>
<td>Retains the same rows and column names of the input table but with a new table alias.</td>
</tr>
<tr>
<td><a href="#where">WHERE</a></td>
<td>Returns the subset of input rows passing the condition.</td>
</tr>
<tr>
<td><a href="#limit">LIMIT</a></td>
<td>Returns the specified number of input rows, preserving ordering (if any).</td>
</tr>
<tr>
<td><a href="#aggregate">AGGREGATE</a></td>
<td>Performs aggregation with or without grouping.</td>
</tr>
<tr>
<td><a href="#join">JOIN</a></td>
<td>Joins rows from both inputs, returning a filtered cross-product of the input table and the table argument.</td>
</tr>
<tr>
<td><a href="#order-by">ORDER BY</a></td>
<td>Returns the input rows after sorting as indicated.</td>
</tr>
<tr>
<td><a href="#union-intersect-except">UNION ALL</a></td>
<td>Performs the union or other set operation over the combined rows from the input table plus other table argument(s).</td>
</tr>
<tr>
<td><a href="#tablesample">TABLESAMPLE</a></td>
<td>Returns the subset of rows chosen by the provided sampling algorithm.</td>
</tr>
<tr>
<td><a href="#pivot">PIVOT</a></td>
<td>Returns a new table with the input rows pivoted to become columns.</td>
</tr>
<tr>
<td><a href="#unpivot">UNPIVOT</a></td>
<td>Returns a new table with the input columns pivoted to become rows.</td>
</tr>
</tbody>
</table>
<p>This table lists each of the supported pipe operators and describes the output rows they produce.
Note that each operator accepts an input relation comprising the rows generated by the query
preceding the <code class="language-plaintext highlighter-rouge">|&gt;</code> symbol.</p>
<h4 id="from-or-table">FROM or TABLE</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">FROM</span> <span class="o">&lt;</span><span class="n">tableName</span><span class="o">&gt;</span>
</code></pre></div></div>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">TABLE</span> <span class="o">&lt;</span><span class="n">tableName</span><span class="o">&gt;</span>
</code></pre></div></div>
<p>Returns all the output rows from the source table unmodified.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">CREATE</span> <span class="k">TABLE</span> <span class="n">t</span> <span class="k">AS</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="k">AS</span> <span class="n">t</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">);</span>
<span class="k">TABLE</span> <span class="n">t</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+---+</span>
<span class="o">|</span> <span class="n">a</span><span class="o">|</span> <span class="n">b</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+---+</span>
<span class="o">|</span> <span class="mi">1</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">3</span><span class="o">|</span> <span class="mi">4</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+---+</span>
</code></pre></div></div>
<h4 id="select">SELECT</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="k">SELECT</span> <span class="o">&lt;</span><span class="n">expr</span><span class="o">&gt;</span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span>
</code></pre></div></div>
<p>Evaluates the provided expressions over each of the rows of the input table.</p>
<p>In general, this operator is not always required with SQL pipe syntax. It is possible to use it at
or near the end of a query to evaluate expressions or specify a list of output columns.</p>
<p>Since the final query result always comprises the columns returned from the last pipe operator,
when this <code class="language-plaintext highlighter-rouge">SELECT</code> operator does not appear, the output includes all columns from the full row.
This behavior is similar to <code class="language-plaintext highlighter-rouge">SELECT *</code> in standard SQL syntax.</p>
<p>It is possible to use <code class="language-plaintext highlighter-rouge">DISTINCT</code> and <code class="language-plaintext highlighter-rouge">*</code> as needed.<br />
This works like the outermost <code class="language-plaintext highlighter-rouge">SELECT</code> in a table subquery in regular Spark SQL.</p>
<p>Window functions are supported in the <code class="language-plaintext highlighter-rouge">SELECT</code> list as well. To use them, the <code class="language-plaintext highlighter-rouge">OVER</code> clause must be
provided. You may provide the window specification in the <code class="language-plaintext highlighter-rouge">WINDOW</code> clause.</p>
<p>Aggregate functions are not supported in this operator. To perform aggregation, use the <code class="language-plaintext highlighter-rouge">AGGREGATE</code>
operator instead.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">CREATE</span> <span class="k">TABLE</span> <span class="n">t</span> <span class="k">AS</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="k">AS</span> <span class="n">t</span><span class="p">(</span><span class="n">col</span><span class="p">);</span>
<span class="k">FROM</span> <span class="n">t</span>
<span class="o">|&gt;</span> <span class="k">SELECT</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span> <span class="k">AS</span> <span class="k">result</span><span class="p">;</span>
<span class="o">+</span><span class="c1">------+</span>
<span class="o">|</span><span class="k">result</span><span class="o">|</span>
<span class="o">+</span><span class="c1">------+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">------+</span>
</code></pre></div></div>
<h4 id="extend">EXTEND</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="n">EXTEND</span> <span class="o">&lt;</span><span class="n">expr</span><span class="o">&gt;</span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span>
</code></pre></div></div>
<p>Appends new columns to the input table by evaluating the specified expressions over each of the
input rows.</p>
<p>After an <code class="language-plaintext highlighter-rouge">EXTEND</code> operation, top-level column names are updated but table aliases still refer to the
original row values (such as an inner join between two tables <code class="language-plaintext highlighter-rouge">lhs</code> and <code class="language-plaintext highlighter-rouge">rhs</code> with a subsequent
<code class="language-plaintext highlighter-rouge">EXTEND</code> and then <code class="language-plaintext highlighter-rouge">SELECT lhs.col, rhs.col</code>).</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="n">EXTEND</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span> <span class="k">AS</span> <span class="k">result</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+------+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span><span class="k">result</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+------+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">1</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+------+</span>
</code></pre></div></div>
<h4 id="set">SET</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="k">SET</span> <span class="o">&lt;</span><span class="k">column</span><span class="o">&gt;</span> <span class="o">=</span> <span class="o">&lt;</span><span class="n">expression</span><span class="o">&gt;</span><span class="p">,</span> <span class="p">...</span>
</code></pre></div></div>
<p>Updates columns of the input table by replacing them with the result of evaluating the provided
expressions. Each such column reference must appear in the input table exactly once.</p>
<p>This is similar to <code class="language-plaintext highlighter-rouge">SELECT * EXCEPT (column), &lt;expression&gt; AS column</code> in regular Spark SQL.</p>
<p>It is possible to perform multiple assignments in a single <code class="language-plaintext highlighter-rouge">SET</code> clause. Each assignment may refer
to the result of previous assignments.</p>
<p>After an assignment, top-level column names are updated but table aliases still refer to the
original row values (such as an inner join between two tables <code class="language-plaintext highlighter-rouge">lhs</code> and <code class="language-plaintext highlighter-rouge">rhs</code> with a subsequent
<code class="language-plaintext highlighter-rouge">SET</code> and then <code class="language-plaintext highlighter-rouge">SELECT lhs.col, rhs.col</code>).</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">SET</span> <span class="n">col</span> <span class="o">=</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">SET</span> <span class="n">col</span> <span class="o">=</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
</code></pre></div></div>
<h4 id="drop">DROP</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="k">DROP</span> <span class="o">&lt;</span><span class="k">column</span><span class="o">&gt;</span><span class="p">,</span> <span class="p">...</span>
</code></pre></div></div>
<p>Drops columns of the input table by name. Each such column reference must appear in the input table
exactly once.</p>
<p>This is similar to <code class="language-plaintext highlighter-rouge">SELECT * EXCEPT (column)</code> in regular Spark SQL.</p>
<p>After a <code class="language-plaintext highlighter-rouge">DROP</code> operation, top-level column names are updated but table aliases still refer to the
original row values (such as an inner join between two tables <code class="language-plaintext highlighter-rouge">lhs</code> and <code class="language-plaintext highlighter-rouge">rhs</code> with a subsequent
<code class="language-plaintext highlighter-rouge">DROP</code> and then <code class="language-plaintext highlighter-rouge">SELECT lhs.col, rhs.col</code>).</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">DROP</span> <span class="n">col1</span><span class="p">;</span>
<span class="o">+</span><span class="c1">----+</span>
<span class="o">|</span><span class="n">col2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">----+</span>
<span class="o">|</span> <span class="mi">1</span><span class="o">|</span>
<span class="o">+</span><span class="c1">----+</span>
</code></pre></div></div>
<h4 id="as">AS</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="k">AS</span> <span class="o">&lt;</span><span class="k">alias</span><span class="o">&gt;</span>
</code></pre></div></div>
<p>Retains the same rows and column names of the input table but with a new table alias.</p>
<p>This operator is useful for introducing a new alias for the input table, which can then be referred
to in subsequent operators. Any existing alias for the table is replaced by the new alias.</p>
<p>It is useful to use this operator after adding new columns with <code class="language-plaintext highlighter-rouge">SELECT</code> or <code class="language-plaintext highlighter-rouge">EXTEND</code> or after
performing aggregation with <code class="language-plaintext highlighter-rouge">AGGREGATE</code>. This simplifies the process of referring to the columns
from subsequent <code class="language-plaintext highlighter-rouge">JOIN</code> operators and allows for more readable queries.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">AS</span> <span class="n">new_tab</span>
<span class="o">|&gt;</span> <span class="k">SELECT</span> <span class="n">col1</span> <span class="o">+</span> <span class="n">col2</span> <span class="k">FROM</span> <span class="n">new_tab</span><span class="p">;</span>
<span class="o">+</span><span class="c1">-----------+</span>
<span class="o">|</span><span class="n">col1</span> <span class="o">+</span> <span class="n">col2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-----------+</span>
<span class="o">|</span> <span class="mi">1</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-----------+</span>
</code></pre></div></div>
<h4 id="where">WHERE</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="k">WHERE</span> <span class="o">&lt;</span><span class="n">condition</span><span class="o">&gt;</span>
</code></pre></div></div>
<p>Returns the subset of input rows passing the condition.</p>
<p>Since this operator may appear anywhere, no separate <code class="language-plaintext highlighter-rouge">HAVING</code> or <code class="language-plaintext highlighter-rouge">QUALIFY</code> syntax is needed.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">WHERE</span> <span class="n">col</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">1</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
</code></pre></div></div>
<h4 id="limit">LIMIT</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="p">[</span><span class="k">LIMIT</span> <span class="o">&lt;</span><span class="n">n</span><span class="o">&gt;</span><span class="p">]</span> <span class="p">[</span><span class="k">OFFSET</span> <span class="o">&lt;</span><span class="n">m</span><span class="o">&gt;</span><span class="p">]</span>
</code></pre></div></div>
<p>Returns the specified number of input rows, preserving ordering (if any).</p>
<p><code class="language-plaintext highlighter-rouge">LIMIT</code> and <code class="language-plaintext highlighter-rouge">OFFSET</code> are supported together. The <code class="language-plaintext highlighter-rouge">LIMIT</code> clause can also be used without the
<code class="language-plaintext highlighter-rouge">OFFSET</code> clause, and the <code class="language-plaintext highlighter-rouge">OFFSET</code> clause can be used without the <code class="language-plaintext highlighter-rouge">LIMIT</code> clause.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">LIMIT</span> <span class="mi">1</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
</code></pre></div></div>
<h4 id="aggregate">AGGREGATE</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Full-table aggregation</span>
<span class="o">|&gt;</span> <span class="k">AGGREGATE</span> <span class="o">&lt;</span><span class="n">agg_expr</span><span class="o">&gt;</span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span>
<span class="c1">-- Aggregation with grouping</span>
<span class="o">|&gt;</span> <span class="k">AGGREGATE</span> <span class="p">[</span><span class="o">&lt;</span><span class="n">agg_expr</span><span class="o">&gt;</span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...]</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="o">&lt;</span><span class="n">grouping_expr</span><span class="o">&gt;</span> <span class="p">[</span><span class="k">AS</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span>
</code></pre></div></div>
<p>Performs aggregation across grouped rows or across the entire input table.</p>
<p>If no <code class="language-plaintext highlighter-rouge">GROUP BY</code> clause is present, this performs full-table aggregation, returning one result row
with a column for each aggregate expression. Othwrise, this performs aggregation with grouping,
returning one row per group. Aliases can be assigned directly on grouping expressions.</p>
<p>The output column list of this operator includes the grouping columns first (if any), and then the
aggregate columns afterward.</p>
<p>Each <code class="language-plaintext highlighter-rouge">&lt;agg_expr&gt;</code> expression can include standard aggregate function(s) like <code class="language-plaintext highlighter-rouge">COUNT</code>, <code class="language-plaintext highlighter-rouge">SUM</code>, <code class="language-plaintext highlighter-rouge">AVG</code>,
<code class="language-plaintext highlighter-rouge">MIN</code>, or any other aggregate function(s) that Spark SQL supports. Additional expressions may appear
below or above the aggregate function(s), such as <code class="language-plaintext highlighter-rouge">MIN(FLOOR(col)) + 1</code>. Each <code class="language-plaintext highlighter-rouge">&lt;agg_expr&gt;</code>
expression must contain at least one aggregate function (or otherwise the query returns an error).
Each <code class="language-plaintext highlighter-rouge">&lt;agg_expr&gt;</code> expression may include a column alias with <code class="language-plaintext highlighter-rouge">AS &lt;alias&gt;</code>, and may also
include a <code class="language-plaintext highlighter-rouge">DISTINCT</code> keyword to remove duplicate values before applying the aggregate function (for
example, <code class="language-plaintext highlighter-rouge">COUNT(DISTINCT col)</code>).</p>
<p>If present, the <code class="language-plaintext highlighter-rouge">GROUP BY</code> clause can include any number of grouping expressions, and each
<code class="language-plaintext highlighter-rouge">&lt;agg_expr&gt;</code> expression will evaluate over each unique combination of values of the grouping
expressions. The output table contains the evaluated grouping expressions followed by the evaluated
aggregate functions. The <code class="language-plaintext highlighter-rouge">GROUP BY</code> expressions may include one-based ordinals. Unlike regular SQL
in which such ordinals refer to the expressions in the accompanying <code class="language-plaintext highlighter-rouge">SELECT</code> clause, in SQL pipe
syntax, they refer to the columns of the relation produced by the preceding operator instead. For
example, in <code class="language-plaintext highlighter-rouge">TABLE t |&gt; AGGREGATE COUNT(*) GROUP BY 2</code>, we refer to the second column of the input
table <code class="language-plaintext highlighter-rouge">t</code>.</p>
<p>There is no need to repeat entire expressions between <code class="language-plaintext highlighter-rouge">GROUP BY</code> and <code class="language-plaintext highlighter-rouge">SELECT</code>, since the <code class="language-plaintext highlighter-rouge">AGGREGATE</code>
operator automatically includes the evaluated grouping expressions in its output. By the same token,
after an <code class="language-plaintext highlighter-rouge">AGGREGATE</code> operator, it is often unnecessary to issue a following <code class="language-plaintext highlighter-rouge">SELECT</code> operator, since
<code class="language-plaintext highlighter-rouge">AGGREGATE</code> returns both the grouping columns and the aggregate columns in a single step.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Full-table aggregation</span>
<span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="k">AS</span> <span class="k">count</span><span class="p">;</span>
<span class="o">+</span><span class="c1">-----+</span>
<span class="o">|</span><span class="k">count</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-----+</span>
<span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-----+</span>
<span class="c1">-- Aggregation with grouping</span>
<span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">col2</span><span class="p">)</span> <span class="k">AS</span> <span class="k">count</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">col1</span><span class="p">;</span>
<span class="o">+</span><span class="c1">----+-----+</span>
<span class="o">|</span><span class="n">col1</span><span class="o">|</span><span class="k">count</span><span class="o">|</span>
<span class="o">+</span><span class="c1">----+-----+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">----+-----+</span>
</code></pre></div></div>
<h4 id="join">JOIN</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="p">[</span><span class="k">LEFT</span> <span class="o">|</span> <span class="k">RIGHT</span> <span class="o">|</span> <span class="k">FULL</span> <span class="o">|</span> <span class="k">CROSS</span> <span class="o">|</span> <span class="n">SEMI</span> <span class="o">|</span> <span class="n">ANTI</span> <span class="o">|</span> <span class="k">NATURAL</span> <span class="o">|</span> <span class="k">LATERAL</span><span class="p">]</span> <span class="k">JOIN</span> <span class="o">&lt;</span><span class="k">table</span><span class="o">&gt;</span> <span class="p">[</span><span class="k">ON</span> <span class="o">&lt;</span><span class="n">condition</span><span class="o">&gt;</span> <span class="o">|</span> <span class="k">USING</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="p">...)]</span>
</code></pre></div></div>
<p>Joins rows from both inputs, returning a filtered cross-product of the pipe input table and the
table expression following the JOIN keyword. This behaves a similar manner as the <code class="language-plaintext highlighter-rouge">JOIN</code> clause in
regular SQL where the pipe operator input table becomes the left side of the join and the table
argument becomes the right side of the join.</p>
<p>Standard join modifiers like <code class="language-plaintext highlighter-rouge">LEFT</code>, <code class="language-plaintext highlighter-rouge">RIGHT</code>, and <code class="language-plaintext highlighter-rouge">FULL</code> are supported before the <code class="language-plaintext highlighter-rouge">JOIN</code> keyword.</p>
<p>The join predicate may need to refer to columns from both inputs to the join. In this case, it may
be necessary to use table aliases to differentiate between columns in the event that both inputs
have columns with the same names. The <code class="language-plaintext highlighter-rouge">AS</code> operator can be useful here to introduce a new alias for
the pipe input table that becomes the left side of the join. Use standard syntax to assign an alias
to the table argument that becomes the right side of the join, if needed.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">SELECT</span> <span class="mi">0</span> <span class="k">AS</span> <span class="n">a</span><span class="p">,</span> <span class="mi">1</span> <span class="k">AS</span> <span class="n">b</span>
<span class="o">|&gt;</span> <span class="k">AS</span> <span class="n">lhs</span>
<span class="o">|&gt;</span> <span class="k">JOIN</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="n">rhs</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span> <span class="k">ON</span> <span class="p">(</span><span class="n">lhs</span><span class="p">.</span><span class="n">a</span> <span class="o">=</span> <span class="n">rhs</span><span class="p">.</span><span class="n">a</span><span class="p">);</span>
<span class="o">+</span><span class="c1">---+---+---+---+</span>
<span class="o">|</span> <span class="n">a</span><span class="o">|</span> <span class="n">b</span><span class="o">|</span> <span class="k">c</span><span class="o">|</span> <span class="n">d</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+---+---+---+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">1</span><span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+---+---+---+</span>
<span class="k">VALUES</span> <span class="p">(</span><span class="s1">'apples'</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="p">(</span><span class="s1">'bananas'</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="n">t</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">sales</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">AS</span> <span class="n">produce_sales</span>
<span class="o">|&gt;</span> <span class="k">LEFT</span> <span class="k">JOIN</span>
<span class="p">(</span><span class="k">SELECT</span> <span class="nv">"apples"</span> <span class="k">AS</span> <span class="n">item</span><span class="p">,</span> <span class="mi">123</span> <span class="k">AS</span> <span class="n">id</span><span class="p">)</span> <span class="k">AS</span> <span class="n">produce_data</span>
<span class="k">USING</span> <span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">SELECT</span> <span class="n">produce_sales</span><span class="p">.</span><span class="n">item</span><span class="p">,</span> <span class="n">sales</span><span class="p">,</span> <span class="n">id</span><span class="p">;</span>
<span class="cm">/*---------+-------+------+
| item | sales | id |
+---------+-------+------+
| apples | 3 | 123 |
| bananas | 4 | NULL |
+---------+-------+------*/</span>
</code></pre></div></div>
<h4 id="order-by">ORDER BY</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="k">ORDER</span> <span class="k">BY</span> <span class="o">&lt;</span><span class="n">expr</span><span class="o">&gt;</span> <span class="p">[</span><span class="k">ASC</span> <span class="o">|</span> <span class="k">DESC</span><span class="p">],</span> <span class="p">...</span>
</code></pre></div></div>
<p>Returns the input rows after sorting as indicated. Standard modifiers are supported including NULLS
FIRST/LAST.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">ORDER</span> <span class="k">BY</span> <span class="n">col</span> <span class="k">DESC</span><span class="p">;</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">1</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
</code></pre></div></div>
<h4 id="union-intersect-except">UNION, INTERSECT, EXCEPT</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="p">{</span><span class="k">UNION</span> <span class="o">|</span> <span class="k">INTERSECT</span> <span class="o">|</span> <span class="k">EXCEPT</span><span class="p">}</span> <span class="p">{</span><span class="k">ALL</span> <span class="o">|</span> <span class="k">DISTINCT</span><span class="p">}</span> <span class="p">(</span><span class="o">&lt;</span><span class="n">query</span><span class="o">&gt;</span><span class="p">)</span>
</code></pre></div></div>
<p>Performs the union or other set operation over the combined rows from the input table or subquery.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="k">UNION</span> <span class="k">ALL</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="k">c</span><span class="p">,</span> <span class="n">d</span><span class="p">);</span>
<span class="o">+</span><span class="c1">---+----+</span>
<span class="o">|</span> <span class="n">a</span><span class="o">|</span> <span class="n">b</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+----+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">1</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">2</span><span class="o">|</span> <span class="mi">3</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+----+</span>
</code></pre></div></div>
<h4 id="tablesample">TABLESAMPLE</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="n">TABLESAMPLE</span> <span class="o">&lt;</span><span class="k">method</span><span class="o">&gt;</span><span class="p">(</span><span class="o">&lt;</span><span class="k">size</span><span class="o">&gt;</span> <span class="p">{</span><span class="k">ROWS</span> <span class="o">|</span> <span class="n">PERCENT</span><span class="p">})</span>
</code></pre></div></div>
<p>Returns the subset of rows chosen by the provided sampling algorithm.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="n">TABLESAMPLE</span> <span class="p">(</span><span class="mi">1</span> <span class="k">ROWS</span><span class="p">);</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="n">TABLESAMPLE</span> <span class="p">(</span><span class="mi">100</span> <span class="n">PERCENT</span><span class="p">);</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span><span class="n">col</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">|</span> <span class="mi">0</span><span class="o">|</span>
<span class="o">+</span><span class="c1">---+</span>
</code></pre></div></div>
<h4 id="pivot">PIVOT</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="n">PIVOT</span> <span class="p">(</span><span class="n">agg_expr</span> <span class="k">FOR</span> <span class="n">col</span> <span class="k">IN</span> <span class="p">(</span><span class="n">val1</span><span class="p">,</span> <span class="p">...))</span>
</code></pre></div></div>
<p>Returns a new table with the input rows pivoted to become columns.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span>
<span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">10000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">20000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">5000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">48000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">30000</span><span class="p">)</span>
<span class="n">courseSales</span><span class="p">(</span><span class="n">course</span><span class="p">,</span> <span class="nb">year</span><span class="p">,</span> <span class="n">earnings</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="n">PIVOT</span> <span class="p">(</span>
<span class="k">SUM</span><span class="p">(</span><span class="n">earnings</span><span class="p">)</span>
<span class="k">FOR</span> <span class="n">COURSE</span> <span class="k">IN</span> <span class="p">(</span><span class="s1">'dotNET'</span><span class="p">,</span> <span class="s1">'Java'</span><span class="p">)</span>
<span class="p">)</span>
<span class="o">+</span><span class="c1">----+------+------+</span>
<span class="o">|</span><span class="nb">year</span><span class="o">|</span><span class="n">dotNET</span><span class="o">|</span> <span class="n">Java</span><span class="o">|</span>
<span class="o">+</span><span class="c1">----+------+------+</span>
<span class="o">|</span><span class="mi">2012</span><span class="o">|</span> <span class="mi">15000</span><span class="o">|</span> <span class="mi">20000</span><span class="o">|</span>
<span class="o">|</span><span class="mi">2013</span><span class="o">|</span> <span class="mi">48000</span><span class="o">|</span> <span class="mi">30000</span><span class="o">|</span>
<span class="o">+</span><span class="c1">----+------+------+</span>
</code></pre></div></div>
<h4 id="unpivot">UNPIVOT</h4>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|&gt;</span> <span class="n">UNPIVOT</span> <span class="p">(</span><span class="n">value_col</span> <span class="k">FOR</span> <span class="n">key_col</span> <span class="k">IN</span> <span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="p">...))</span>
</code></pre></div></div>
<p>Returns a new table with the input columns pivoted to become rows.</p>
<p>For example:</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span>
<span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">10000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">20000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">5000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">48000</span><span class="p">),</span>
<span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">30000</span><span class="p">)</span>
<span class="n">courseSales</span><span class="p">(</span><span class="n">course</span><span class="p">,</span> <span class="nb">year</span><span class="p">,</span> <span class="n">earnings</span><span class="p">)</span>
<span class="o">|&gt;</span> <span class="n">UNPIVOT</span> <span class="p">(</span>
<span class="n">earningsYear</span> <span class="k">FOR</span> <span class="nv">`year`</span> <span class="k">IN</span> <span class="p">(</span><span class="nv">`2012`</span><span class="p">,</span> <span class="nv">`2013`</span><span class="p">,</span> <span class="nv">`2014`</span><span class="p">)</span>
<span class="o">+</span><span class="c1">--------+------+--------+</span>
<span class="o">|</span> <span class="n">course</span><span class="o">|</span> <span class="nb">year</span><span class="o">|</span><span class="n">earnings</span><span class="o">|</span>
<span class="o">+</span><span class="c1">--------+------+--------+</span>
<span class="o">|</span> <span class="n">Java</span><span class="o">|</span> <span class="mi">2012</span><span class="o">|</span> <span class="mi">20000</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Java</span><span class="o">|</span> <span class="mi">2013</span><span class="o">|</span> <span class="mi">30000</span><span class="o">|</span>
<span class="o">|</span> <span class="n">dotNET</span><span class="o">|</span> <span class="mi">2012</span><span class="o">|</span> <span class="mi">15000</span><span class="o">|</span>
<span class="o">|</span> <span class="n">dotNET</span><span class="o">|</span> <span class="mi">2013</span><span class="o">|</span> <span class="mi">48000</span><span class="o">|</span>
<span class="o">|</span> <span class="n">dotNET</span><span class="o">|</span> <span class="mi">2014</span><span class="o">|</span> <span class="mi">22500</span><span class="o">|</span>
<span class="o">+</span><span class="c1">--------+------+--------+</span>
</code></pre></div></div>
</div>
<!-- /container -->
</div>
<script src="js/vendor/jquery-3.5.1.min.js"></script>
<script src="js/vendor/bootstrap.bundle.min.js"></script>
<script src="js/vendor/anchor.min.js"></script>
<script src="js/main.js"></script>
<script type="text/javascript" src="js/vendor/docsearch.min.js"></script>
<script type="text/javascript">
// DocSearch is entirely free and automated. DocSearch is built in two parts:
// 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link
// in your website and extract content from every page it traverses. It then pushes this
// content to an Algolia index.
// 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
// to your search input and display its results in a dropdown UI. If you want to find more
// details on how works DocSearch, check the docs of DocSearch.
docsearch({
apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
appId: 'RAI69RXRSK',
indexName: 'apache_spark',
inputSelector: '#docsearch-input',
enhancedSearchInput: true,
algoliaOptions: {
'facetFilters': ["version:4.1.0-preview1"]
},
debug: false // Set debug to true if you want to inspect the dropdown
});
</script>
<!-- MathJax Section -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
TeX: { equationNumbers: { autoNumber: "AMS" } }
});
</script>
<script>
// Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS.
// We could use "//cdn.mathjax...", but that won't support "file://".
(function(d, script) {
script = d.createElement('script');
script.type = 'text/javascript';
script.async = true;
script.onload = function(){
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
});
};
script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') +
'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' +
'?config=TeX-AMS-MML_HTMLorMML';
d.getElementsByTagName('head')[0].appendChild(script);
}(document));
</script>
</body>
</html>