| |
| |
| |
| |
| <!DOCTYPE html> |
| <html class="no-js"> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>SQL Pipe Syntax - Spark 4.1.0-preview1 Documentation</title> |
| |
| |
| |
| |
| |
| <link rel="stylesheet" href="css/bootstrap.min.css"> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet"> |
| <link href="css/custom.css" rel="stylesheet"> |
| <script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script> |
| |
| <link rel="stylesheet" href="css/pygments-default.css"> |
| <link rel="stylesheet" href="css/docsearch.min.css" /> |
| <link rel="stylesheet" href="css/docsearch.css"> |
| |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '40']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| |
| </head> |
| <body class="global"> |
| <!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html --> |
| <nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar"> |
| <div class="navbar-brand"><a href="index.html"> |
| <img src="https://spark.apache.org/images/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">4.1.0-preview1</span> |
| </div> |
| <button class="navbar-toggler" type="button" data-toggle="collapse" |
| data-target="#navbarCollapse" aria-controls="navbarCollapse" |
| aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| <div class="collapse navbar-collapse" id="navbarCollapse"> |
| <ul class="navbar-nav me-auto"> |
| <li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a> |
| <div class="dropdown-menu" aria-labelledby="navbarQuickStart"> |
| <a class="dropdown-item" href="quick-start.html">Quick Start</a> |
| <a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a> |
| <a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a> |
| <a class="dropdown-item" href="streaming/index.html">Structured Streaming</a> |
| <a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a> |
| <a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a> |
| <a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a> |
| <a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a> |
| <a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a> |
| <a class="dropdown-item" href="declarative-pipelines-programming-guide.html">Declarative Pipelines</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a> |
| <div class="dropdown-menu" aria-labelledby="navbarAPIDocs"> |
| <a class="dropdown-item" href="api/python/index.html">Python</a> |
| <a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a> |
| <a class="dropdown-item" href="api/java/index.html">Java</a> |
| <a class="dropdown-item" href="api/R/index.html">R</a> |
| <a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a> |
| <div class="dropdown-menu" aria-labelledby="navbarDeploying"> |
| <a class="dropdown-item" href="cluster-overview.html">Overview</a> |
| <a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a> |
| <div class="dropdown-divider"></div> |
| <a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a> |
| <a class="dropdown-item" href="running-on-yarn.html">YARN</a> |
| <a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a> |
| <div class="dropdown-menu" aria-labelledby="navbarMore"> |
| <a class="dropdown-item" href="configuration.html">Configuration</a> |
| <a class="dropdown-item" href="monitoring.html">Monitoring</a> |
| <a class="dropdown-item" href="tuning.html">Tuning Guide</a> |
| <a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a> |
| <a class="dropdown-item" href="security.html">Security</a> |
| <a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a> |
| <a class="dropdown-item" href="migration-guide.html">Migration Guide</a> |
| <div class="dropdown-divider"></div> |
| <a class="dropdown-item" href="building-spark.html">Building Spark</a> |
| <a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a> |
| <a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a> |
| </div> |
| </li> |
| |
| <li class="nav-item"> |
| <input type="text" id="docsearch-input" placeholder="Search the docs…"> |
| </li> |
| </ul> |
| <!--<span class="navbar-text navbar-right"><span class="version-text">v4.1.0-preview1</span></span>--> |
| </div> |
| </nav> |
| |
| |
| |
| <div class="container"> |
| |
| |
| <div class="left-menu-wrapper"> |
| <div class="left-menu"> |
| <h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3> |
| |
| <ul> |
| |
| <li> |
| <a href="sql-getting-started.html"> |
| |
| Getting Started |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-data-sources.html"> |
| |
| Data Sources |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-performance-tuning.html"> |
| |
| Performance Tuning |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-distributed-sql-engine.html"> |
| |
| Distributed SQL Engine |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-pyspark-pandas-with-arrow.html"> |
| |
| PySpark Usage Guide for Pandas with Apache Arrow |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-migration-guide.html"> |
| |
| Migration Guide |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-ref.html"> |
| |
| SQL Reference |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-error-conditions.html"> |
| |
| Error Conditions |
| |
| </a> |
| </li> |
| |
| |
| |
| </ul> |
| |
| </div> |
| </div> |
| |
| <input id="nav-trigger" class="nav-trigger" checked type="checkbox"> |
| <label for="nav-trigger"></label> |
| <div class="content-with-sidebar mr-3" id="content"> |
| |
| <h1 class="title">SQL Pipe Syntax</h1> |
| |
| |
| <h3 id="syntax">Syntax</h3> |
| |
| <h4 id="overview">Overview</h4> |
| |
| <p>Apache Spark supports SQL pipe syntax which allows composing queries from combinations of operators.</p> |
| |
| <ul> |
| <li>Any query can have zero or more pipe operators as a suffix, delineated by the pipe character <code class="language-plaintext highlighter-rouge">|></code>.</li> |
| <li>Each pipe operator starts with one or more SQL keywords followed by its own grammar as described |
| in the table below.</li> |
| <li>Most of these operators reuse existing grammar for standard SQL clauses.</li> |
| <li>Operators can apply in any order, any number of times.</li> |
| </ul> |
| |
| <p><code class="language-plaintext highlighter-rouge">FROM <tableName></code> is now a supported standalone query which behaves the same as |
| <code class="language-plaintext highlighter-rouge">TABLE <tableName></code>. This provides a convenient starting place to begin a chained pipe SQL query, |
| although it is possible to add one or more pipe operators to the end of any valid Spark SQL query |
| with the same consistent behavior as written here.</p> |
| |
| <p>Please refer to the table at the end of this document for a full list of all supported operators |
| and their semantics.</p> |
| |
| <h4 id="example">Example</h4> |
| |
| <p>For example, this is query 13 from the TPC-H benchmark:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">SELECT</span> <span class="n">c_count</span><span class="p">,</span> <span class="k">COUNT</span><span class="p">(</span><span class="o">*</span><span class="p">)</span> <span class="k">AS</span> <span class="n">custdist</span> |
| <span class="k">FROM</span> |
| <span class="p">(</span><span class="k">SELECT</span> <span class="n">c_custkey</span><span class="p">,</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">o_orderkey</span><span class="p">)</span> <span class="n">c_count</span> |
| <span class="k">FROM</span> <span class="n">customer</span> |
| <span class="k">LEFT</span> <span class="k">OUTER</span> <span class="k">JOIN</span> <span class="n">orders</span> <span class="k">ON</span> <span class="n">c_custkey</span> <span class="o">=</span> <span class="n">o_custkey</span> |
| <span class="k">AND</span> <span class="n">o_comment</span> <span class="k">NOT</span> <span class="k">LIKE</span> <span class="s1">'%unusual%packages%'</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_custkey</span> |
| <span class="p">)</span> <span class="k">AS</span> <span class="n">c_orders</span> |
| <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_count</span> |
| <span class="k">ORDER</span> <span class="k">BY</span> <span class="n">custdist</span> <span class="k">DESC</span><span class="p">,</span> <span class="n">c_count</span> <span class="k">DESC</span><span class="p">;</span> |
| </code></pre></div></div> |
| |
| <p>To write the same logic using SQL pipe operators, we express it like this:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">FROM</span> <span class="n">customer</span> |
| <span class="o">|></span> <span class="k">LEFT</span> <span class="k">OUTER</span> <span class="k">JOIN</span> <span class="n">orders</span> <span class="k">ON</span> <span class="n">c_custkey</span> <span class="o">=</span> <span class="n">o_custkey</span> |
| <span class="k">AND</span> <span class="n">o_comment</span> <span class="k">NOT</span> <span class="k">LIKE</span> <span class="s1">'%unusual%packages%'</span> |
| <span class="o">|></span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">o_orderkey</span><span class="p">)</span> <span class="n">c_count</span> |
| <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_custkey</span> |
| <span class="o">|></span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="o">*</span><span class="p">)</span> <span class="k">AS</span> <span class="n">custdist</span> |
| <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">c_count</span> |
| <span class="o">|></span> <span class="k">ORDER</span> <span class="k">BY</span> <span class="n">custdist</span> <span class="k">DESC</span><span class="p">,</span> <span class="n">c_count</span> <span class="k">DESC</span><span class="p">;</span> |
| </code></pre></div></div> |
| |
| <h4 id="source-tables">Source Tables</h4> |
| |
| <p>To start a new query using SQL pipe syntax, use the <code class="language-plaintext highlighter-rouge">FROM <tableName></code> or <code class="language-plaintext highlighter-rouge">TABLE <tableName></code> |
| clause, which creates a relation comprising all rows from the source table. Then append one or more |
| pipe operators to the end of this clause to perform further transformations.</p> |
| |
| <h4 id="projections">Projections</h4> |
| |
| <p>SQL pipe syntax supports composable ways to evaluate expressions. A major advantage of these |
| projection features is that they support computing new expressions based on previous ones in an |
| incremental way. No lateral column references are needed here since each operator applies |
| independently on its input table, regardless of the order in which the operators appear. Each of |
| these computed columns then becomes visible to use with the following operator.</p> |
| |
| <p><code class="language-plaintext highlighter-rouge">SELECT</code> produces a new table by evaluating the provided expressions.<br /> |
| It is possible to use <code class="language-plaintext highlighter-rouge">DISTINCT</code> and <code class="language-plaintext highlighter-rouge">*</code> as needed.<br /> |
| This works like the outermost <code class="language-plaintext highlighter-rouge">SELECT</code> in a table subquery in regular Spark SQL.</p> |
| |
| <p><code class="language-plaintext highlighter-rouge">EXTEND</code> adds new columns to the input table by evaluating the provided expressions.<br /> |
| This also preserves table aliases.<br /> |
| This works like <code class="language-plaintext highlighter-rouge">SELECT *, new_column</code> in regular Spark SQL.</p> |
| |
| <p><code class="language-plaintext highlighter-rouge">DROP</code> removes columns from the input table.<br /> |
| This is similar to <code class="language-plaintext highlighter-rouge">SELECT * EXCEPT (column)</code> in regular Spark SQL.</p> |
| |
| <p><code class="language-plaintext highlighter-rouge">SET</code> replaces column values from the input table.<br /> |
| This is similar to <code class="language-plaintext highlighter-rouge">SELECT * REPLACE (expression AS column)</code> in regular Spark SQL.</p> |
| |
| <p><code class="language-plaintext highlighter-rouge">AS</code> forwards the input table and introduces a new alias for each row.</p> |
| |
| <h4 id="aggregations">Aggregations</h4> |
| |
| <p>In general, aggregation takes place differently using SQL pipe syntax as opposed to regular Spark |
| SQL.</p> |
| |
| <p>To perform full-table aggregation, use the <code class="language-plaintext highlighter-rouge">AGGREGATE</code> operator with a list of aggregate |
| expressions to evaluate. This returns one single row in the output table.</p> |
| |
| <p>To perform aggregation with grouping, use the <code class="language-plaintext highlighter-rouge">AGGREGATE</code> operator with a <code class="language-plaintext highlighter-rouge">GROUP BY</code> clause. |
| This returns one row for each unique combination of values of the grouping expressions. The output |
| table contains the evaluated grouping expressions followed by the evaluated aggregate functions. |
| Grouping expressions support assigning aliases for purposes of referring to them in future |
| operators. In this way, it is not necessary to repeat entire expressions between <code class="language-plaintext highlighter-rouge">GROUP BY</code> and |
| <code class="language-plaintext highlighter-rouge">SELECT</code>, since <code class="language-plaintext highlighter-rouge">AGGREGATE</code> is a single operator that performs both.</p> |
| |
| <h4 id="other-transformations">Other Transformations</h4> |
| |
| <p>The remaining operators are used for other transformations, such as filtering, joining, sorting, |
| sampling, and set operations. These operators generally work in the same way as in regular Spark |
| SQL, as described in the table below.</p> |
| |
| <h3 id="independence-and-interoperability">Independence and Interoperability</h3> |
| |
| <p>SQL pipe syntax works in Spark without any backwards-compatibility concerns with existing SQL |
| queries; it is possible to write any query using regular Spark SQL, pipe syntax, or a combination of |
| the two. As a consequence, the following invariants always hold:</p> |
| |
| <ul> |
| <li>Each pipe operator receives an input table and operates the same way on its rows regardless of how |
| it was computed.</li> |
| <li>For any valid chain of N SQL pipe operators, any subset of the first M <= N operators also |
| represents a valid query.<br /> |
| This property can be useful for introspection and debugging, such as by selected a subset of |
| lines and using the “run highlighted text” feature of SQL editors like Jupyter notebooks.</li> |
| <li>It is possible to append pipe operators to any valid query written in regular Spark SQL.<br /> |
| The canonical way of starting pipe syntax queries is with the <code class="language-plaintext highlighter-rouge">FROM <tableName></code> clause.<br /> |
| Note that this is a valid standalone query and may be replaced with any other Spark SQL query |
| without loss of generality.</li> |
| <li>Table subqueries can be written using either regular Spark SQL syntax or pipe syntax.<br /> |
| They may appear inside enclosing queries written in either syntax.</li> |
| <li>Other Spark SQL statements such as views and DDL and DML commands may include queries written |
| using either syntax.</li> |
| </ul> |
| |
| <h3 id="supported-operators">Supported Operators</h3> |
| |
| <table> |
| <thead> |
| <tr> |
| <th>Operator</th> |
| <th>Output rows</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td><a href="#from-or-table">FROM</a> or <a href="#from-or-table">TABLE</a></td> |
| <td>Returns all the output rows from the source table unmodified.</td> |
| </tr> |
| <tr> |
| <td><a href="#select">SELECT</a></td> |
| <td>Evaluates the provided expressions over each of the rows of the input table.</td> |
| </tr> |
| <tr> |
| <td><a href="#extend">EXTEND</a></td> |
| <td>Appends new columns to the input table by evaluating the specified expressions over each of the input rows.</td> |
| </tr> |
| <tr> |
| <td><a href="#set">SET</a></td> |
| <td>Updates columns of the input table by replacing them with the result of evaluating the provided expressions.</td> |
| </tr> |
| <tr> |
| <td><a href="#drop">DROP</a></td> |
| <td>Drops columns of the input table by name.</td> |
| </tr> |
| <tr> |
| <td><a href="#as">AS</a></td> |
| <td>Retains the same rows and column names of the input table but with a new table alias.</td> |
| </tr> |
| <tr> |
| <td><a href="#where">WHERE</a></td> |
| <td>Returns the subset of input rows passing the condition.</td> |
| </tr> |
| <tr> |
| <td><a href="#limit">LIMIT</a></td> |
| <td>Returns the specified number of input rows, preserving ordering (if any).</td> |
| </tr> |
| <tr> |
| <td><a href="#aggregate">AGGREGATE</a></td> |
| <td>Performs aggregation with or without grouping.</td> |
| </tr> |
| <tr> |
| <td><a href="#join">JOIN</a></td> |
| <td>Joins rows from both inputs, returning a filtered cross-product of the input table and the table argument.</td> |
| </tr> |
| <tr> |
| <td><a href="#order-by">ORDER BY</a></td> |
| <td>Returns the input rows after sorting as indicated.</td> |
| </tr> |
| <tr> |
| <td><a href="#union-intersect-except">UNION ALL</a></td> |
| <td>Performs the union or other set operation over the combined rows from the input table plus other table argument(s).</td> |
| </tr> |
| <tr> |
| <td><a href="#tablesample">TABLESAMPLE</a></td> |
| <td>Returns the subset of rows chosen by the provided sampling algorithm.</td> |
| </tr> |
| <tr> |
| <td><a href="#pivot">PIVOT</a></td> |
| <td>Returns a new table with the input rows pivoted to become columns.</td> |
| </tr> |
| <tr> |
| <td><a href="#unpivot">UNPIVOT</a></td> |
| <td>Returns a new table with the input columns pivoted to become rows.</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <p>This table lists each of the supported pipe operators and describes the output rows they produce. |
| Note that each operator accepts an input relation comprising the rows generated by the query |
| preceding the <code class="language-plaintext highlighter-rouge">|></code> symbol.</p> |
| |
| <h4 id="from-or-table">FROM or TABLE</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">FROM</span> <span class="o"><</span><span class="n">tableName</span><span class="o">></span> |
| </code></pre></div></div> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">TABLE</span> <span class="o"><</span><span class="n">tableName</span><span class="o">></span> |
| </code></pre></div></div> |
| |
| <p>Returns all the output rows from the source table unmodified.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">CREATE</span> <span class="k">TABLE</span> <span class="n">t</span> <span class="k">AS</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="k">AS</span> <span class="n">t</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">);</span> |
| <span class="k">TABLE</span> <span class="n">t</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+---+</span> |
| <span class="o">|</span> <span class="n">a</span><span class="o">|</span> <span class="n">b</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+---+</span> |
| <span class="o">|</span> <span class="mi">1</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">3</span><span class="o">|</span> <span class="mi">4</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+---+</span> |
| </code></pre></div></div> |
| |
| <h4 id="select">SELECT</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="k">SELECT</span> <span class="o"><</span><span class="n">expr</span><span class="o">></span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span> |
| </code></pre></div></div> |
| |
| <p>Evaluates the provided expressions over each of the rows of the input table.</p> |
| |
| <p>In general, this operator is not always required with SQL pipe syntax. It is possible to use it at |
| or near the end of a query to evaluate expressions or specify a list of output columns.</p> |
| |
| <p>Since the final query result always comprises the columns returned from the last pipe operator, |
| when this <code class="language-plaintext highlighter-rouge">SELECT</code> operator does not appear, the output includes all columns from the full row. |
| This behavior is similar to <code class="language-plaintext highlighter-rouge">SELECT *</code> in standard SQL syntax.</p> |
| |
| <p>It is possible to use <code class="language-plaintext highlighter-rouge">DISTINCT</code> and <code class="language-plaintext highlighter-rouge">*</code> as needed.<br /> |
| This works like the outermost <code class="language-plaintext highlighter-rouge">SELECT</code> in a table subquery in regular Spark SQL.</p> |
| |
| <p>Window functions are supported in the <code class="language-plaintext highlighter-rouge">SELECT</code> list as well. To use them, the <code class="language-plaintext highlighter-rouge">OVER</code> clause must be |
| provided. You may provide the window specification in the <code class="language-plaintext highlighter-rouge">WINDOW</code> clause.</p> |
| |
| <p>Aggregate functions are not supported in this operator. To perform aggregation, use the <code class="language-plaintext highlighter-rouge">AGGREGATE</code> |
| operator instead.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">CREATE</span> <span class="k">TABLE</span> <span class="n">t</span> <span class="k">AS</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="k">AS</span> <span class="n">t</span><span class="p">(</span><span class="n">col</span><span class="p">);</span> |
| |
| <span class="k">FROM</span> <span class="n">t</span> |
| <span class="o">|></span> <span class="k">SELECT</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span> <span class="k">AS</span> <span class="k">result</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">------+</span> |
| <span class="o">|</span><span class="k">result</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">------+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">------+</span> |
| </code></pre></div></div> |
| |
| <h4 id="extend">EXTEND</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="n">EXTEND</span> <span class="o"><</span><span class="n">expr</span><span class="o">></span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span> |
| </code></pre></div></div> |
| |
| <p>Appends new columns to the input table by evaluating the specified expressions over each of the |
| input rows.</p> |
| |
| <p>After an <code class="language-plaintext highlighter-rouge">EXTEND</code> operation, top-level column names are updated but table aliases still refer to the |
| original row values (such as an inner join between two tables <code class="language-plaintext highlighter-rouge">lhs</code> and <code class="language-plaintext highlighter-rouge">rhs</code> with a subsequent |
| <code class="language-plaintext highlighter-rouge">EXTEND</code> and then <code class="language-plaintext highlighter-rouge">SELECT lhs.col, rhs.col</code>).</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="n">EXTEND</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span> <span class="k">AS</span> <span class="k">result</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+------+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span><span class="k">result</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+------+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">1</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+------+</span> |
| </code></pre></div></div> |
| |
| <h4 id="set">SET</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="k">SET</span> <span class="o"><</span><span class="k">column</span><span class="o">></span> <span class="o">=</span> <span class="o"><</span><span class="n">expression</span><span class="o">></span><span class="p">,</span> <span class="p">...</span> |
| </code></pre></div></div> |
| |
| <p>Updates columns of the input table by replacing them with the result of evaluating the provided |
| expressions. Each such column reference must appear in the input table exactly once.</p> |
| |
| <p>This is similar to <code class="language-plaintext highlighter-rouge">SELECT * EXCEPT (column), <expression> AS column</code> in regular Spark SQL.</p> |
| |
| <p>It is possible to perform multiple assignments in a single <code class="language-plaintext highlighter-rouge">SET</code> clause. Each assignment may refer |
| to the result of previous assignments.</p> |
| |
| <p>After an assignment, top-level column names are updated but table aliases still refer to the |
| original row values (such as an inner join between two tables <code class="language-plaintext highlighter-rouge">lhs</code> and <code class="language-plaintext highlighter-rouge">rhs</code> with a subsequent |
| <code class="language-plaintext highlighter-rouge">SET</code> and then <code class="language-plaintext highlighter-rouge">SELECT lhs.col, rhs.col</code>).</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">SET</span> <span class="n">col</span> <span class="o">=</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| |
| <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">SET</span> <span class="n">col</span> <span class="o">=</span> <span class="n">col</span> <span class="o">*</span> <span class="mi">2</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| </code></pre></div></div> |
| |
| <h4 id="drop">DROP</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="k">DROP</span> <span class="o"><</span><span class="k">column</span><span class="o">></span><span class="p">,</span> <span class="p">...</span> |
| </code></pre></div></div> |
| |
| <p>Drops columns of the input table by name. Each such column reference must appear in the input table |
| exactly once.</p> |
| |
| <p>This is similar to <code class="language-plaintext highlighter-rouge">SELECT * EXCEPT (column)</code> in regular Spark SQL.</p> |
| |
| <p>After a <code class="language-plaintext highlighter-rouge">DROP</code> operation, top-level column names are updated but table aliases still refer to the |
| original row values (such as an inner join between two tables <code class="language-plaintext highlighter-rouge">lhs</code> and <code class="language-plaintext highlighter-rouge">rhs</code> with a subsequent |
| <code class="language-plaintext highlighter-rouge">DROP</code> and then <code class="language-plaintext highlighter-rouge">SELECT lhs.col, rhs.col</code>).</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">DROP</span> <span class="n">col1</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">----+</span> |
| <span class="o">|</span><span class="n">col2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">----+</span> |
| <span class="o">|</span> <span class="mi">1</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">----+</span> |
| </code></pre></div></div> |
| |
| <h4 id="as">AS</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="k">AS</span> <span class="o"><</span><span class="k">alias</span><span class="o">></span> |
| </code></pre></div></div> |
| |
| <p>Retains the same rows and column names of the input table but with a new table alias.</p> |
| |
| <p>This operator is useful for introducing a new alias for the input table, which can then be referred |
| to in subsequent operators. Any existing alias for the table is replaced by the new alias.</p> |
| |
| <p>It is useful to use this operator after adding new columns with <code class="language-plaintext highlighter-rouge">SELECT</code> or <code class="language-plaintext highlighter-rouge">EXTEND</code> or after |
| performing aggregation with <code class="language-plaintext highlighter-rouge">AGGREGATE</code>. This simplifies the process of referring to the columns |
| from subsequent <code class="language-plaintext highlighter-rouge">JOIN</code> operators and allows for more readable queries.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">AS</span> <span class="n">new_tab</span> |
| <span class="o">|></span> <span class="k">SELECT</span> <span class="n">col1</span> <span class="o">+</span> <span class="n">col2</span> <span class="k">FROM</span> <span class="n">new_tab</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">-----------+</span> |
| <span class="o">|</span><span class="n">col1</span> <span class="o">+</span> <span class="n">col2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">-----------+</span> |
| <span class="o">|</span> <span class="mi">1</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">-----------+</span> |
| </code></pre></div></div> |
| |
| <h4 id="where">WHERE</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="k">WHERE</span> <span class="o"><</span><span class="n">condition</span><span class="o">></span> |
| </code></pre></div></div> |
| |
| <p>Returns the subset of input rows passing the condition.</p> |
| |
| <p>Since this operator may appear anywhere, no separate <code class="language-plaintext highlighter-rouge">HAVING</code> or <code class="language-plaintext highlighter-rouge">QUALIFY</code> syntax is needed.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">WHERE</span> <span class="n">col</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">1</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| </code></pre></div></div> |
| |
| <h4 id="limit">LIMIT</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="p">[</span><span class="k">LIMIT</span> <span class="o"><</span><span class="n">n</span><span class="o">></span><span class="p">]</span> <span class="p">[</span><span class="k">OFFSET</span> <span class="o"><</span><span class="n">m</span><span class="o">></span><span class="p">]</span> |
| </code></pre></div></div> |
| |
| <p>Returns the specified number of input rows, preserving ordering (if any).</p> |
| |
| <p><code class="language-plaintext highlighter-rouge">LIMIT</code> and <code class="language-plaintext highlighter-rouge">OFFSET</code> are supported together. The <code class="language-plaintext highlighter-rouge">LIMIT</code> clause can also be used without the |
| <code class="language-plaintext highlighter-rouge">OFFSET</code> clause, and the <code class="language-plaintext highlighter-rouge">OFFSET</code> clause can be used without the <code class="language-plaintext highlighter-rouge">LIMIT</code> clause.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">LIMIT</span> <span class="mi">1</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| </code></pre></div></div> |
| |
| <h4 id="aggregate">AGGREGATE</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Full-table aggregation</span> |
| <span class="o">|></span> <span class="k">AGGREGATE</span> <span class="o"><</span><span class="n">agg_expr</span><span class="o">></span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span> |
| |
| <span class="c1">-- Aggregation with grouping</span> |
| <span class="o">|></span> <span class="k">AGGREGATE</span> <span class="p">[</span><span class="o"><</span><span class="n">agg_expr</span><span class="o">></span> <span class="p">[[</span><span class="k">AS</span><span class="p">]</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...]</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="o"><</span><span class="n">grouping_expr</span><span class="o">></span> <span class="p">[</span><span class="k">AS</span> <span class="k">alias</span><span class="p">],</span> <span class="p">...</span> |
| </code></pre></div></div> |
| |
| <p>Performs aggregation across grouped rows or across the entire input table.</p> |
| |
| <p>If no <code class="language-plaintext highlighter-rouge">GROUP BY</code> clause is present, this performs full-table aggregation, returning one result row |
| with a column for each aggregate expression. Othwrise, this performs aggregation with grouping, |
| returning one row per group. Aliases can be assigned directly on grouping expressions.</p> |
| |
| <p>The output column list of this operator includes the grouping columns first (if any), and then the |
| aggregate columns afterward.</p> |
| |
| <p>Each <code class="language-plaintext highlighter-rouge"><agg_expr></code> expression can include standard aggregate function(s) like <code class="language-plaintext highlighter-rouge">COUNT</code>, <code class="language-plaintext highlighter-rouge">SUM</code>, <code class="language-plaintext highlighter-rouge">AVG</code>, |
| <code class="language-plaintext highlighter-rouge">MIN</code>, or any other aggregate function(s) that Spark SQL supports. Additional expressions may appear |
| below or above the aggregate function(s), such as <code class="language-plaintext highlighter-rouge">MIN(FLOOR(col)) + 1</code>. Each <code class="language-plaintext highlighter-rouge"><agg_expr></code> |
| expression must contain at least one aggregate function (or otherwise the query returns an error). |
| Each <code class="language-plaintext highlighter-rouge"><agg_expr></code> expression may include a column alias with <code class="language-plaintext highlighter-rouge">AS <alias></code>, and may also |
| include a <code class="language-plaintext highlighter-rouge">DISTINCT</code> keyword to remove duplicate values before applying the aggregate function (for |
| example, <code class="language-plaintext highlighter-rouge">COUNT(DISTINCT col)</code>).</p> |
| |
| <p>If present, the <code class="language-plaintext highlighter-rouge">GROUP BY</code> clause can include any number of grouping expressions, and each |
| <code class="language-plaintext highlighter-rouge"><agg_expr></code> expression will evaluate over each unique combination of values of the grouping |
| expressions. The output table contains the evaluated grouping expressions followed by the evaluated |
| aggregate functions. The <code class="language-plaintext highlighter-rouge">GROUP BY</code> expressions may include one-based ordinals. Unlike regular SQL |
| in which such ordinals refer to the expressions in the accompanying <code class="language-plaintext highlighter-rouge">SELECT</code> clause, in SQL pipe |
| syntax, they refer to the columns of the relation produced by the preceding operator instead. For |
| example, in <code class="language-plaintext highlighter-rouge">TABLE t |> AGGREGATE COUNT(*) GROUP BY 2</code>, we refer to the second column of the input |
| table <code class="language-plaintext highlighter-rouge">t</code>.</p> |
| |
| <p>There is no need to repeat entire expressions between <code class="language-plaintext highlighter-rouge">GROUP BY</code> and <code class="language-plaintext highlighter-rouge">SELECT</code>, since the <code class="language-plaintext highlighter-rouge">AGGREGATE</code> |
| operator automatically includes the evaluated grouping expressions in its output. By the same token, |
| after an <code class="language-plaintext highlighter-rouge">AGGREGATE</code> operator, it is often unnecessary to issue a following <code class="language-plaintext highlighter-rouge">SELECT</code> operator, since |
| <code class="language-plaintext highlighter-rouge">AGGREGATE</code> returns both the grouping columns and the aggregate columns in a single step.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Full-table aggregation</span> |
| <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="k">AS</span> <span class="k">count</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">-----+</span> |
| <span class="o">|</span><span class="k">count</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">-----+</span> |
| <span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">-----+</span> |
| |
| <span class="c1">-- Aggregation with grouping</span> |
| <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">AGGREGATE</span> <span class="k">COUNT</span><span class="p">(</span><span class="n">col2</span><span class="p">)</span> <span class="k">AS</span> <span class="k">count</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">col1</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">----+-----+</span> |
| <span class="o">|</span><span class="n">col1</span><span class="o">|</span><span class="k">count</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">----+-----+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">----+-----+</span> |
| </code></pre></div></div> |
| |
| <h4 id="join">JOIN</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="p">[</span><span class="k">LEFT</span> <span class="o">|</span> <span class="k">RIGHT</span> <span class="o">|</span> <span class="k">FULL</span> <span class="o">|</span> <span class="k">CROSS</span> <span class="o">|</span> <span class="n">SEMI</span> <span class="o">|</span> <span class="n">ANTI</span> <span class="o">|</span> <span class="k">NATURAL</span> <span class="o">|</span> <span class="k">LATERAL</span><span class="p">]</span> <span class="k">JOIN</span> <span class="o"><</span><span class="k">table</span><span class="o">></span> <span class="p">[</span><span class="k">ON</span> <span class="o"><</span><span class="n">condition</span><span class="o">></span> <span class="o">|</span> <span class="k">USING</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="p">...)]</span> |
| </code></pre></div></div> |
| |
| <p>Joins rows from both inputs, returning a filtered cross-product of the pipe input table and the |
| table expression following the JOIN keyword. This behaves a similar manner as the <code class="language-plaintext highlighter-rouge">JOIN</code> clause in |
| regular SQL where the pipe operator input table becomes the left side of the join and the table |
| argument becomes the right side of the join.</p> |
| |
| <p>Standard join modifiers like <code class="language-plaintext highlighter-rouge">LEFT</code>, <code class="language-plaintext highlighter-rouge">RIGHT</code>, and <code class="language-plaintext highlighter-rouge">FULL</code> are supported before the <code class="language-plaintext highlighter-rouge">JOIN</code> keyword.</p> |
| |
| <p>The join predicate may need to refer to columns from both inputs to the join. In this case, it may |
| be necessary to use table aliases to differentiate between columns in the event that both inputs |
| have columns with the same names. The <code class="language-plaintext highlighter-rouge">AS</code> operator can be useful here to introduce a new alias for |
| the pipe input table that becomes the left side of the join. Use standard syntax to assign an alias |
| to the table argument that becomes the right side of the join, if needed.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">SELECT</span> <span class="mi">0</span> <span class="k">AS</span> <span class="n">a</span><span class="p">,</span> <span class="mi">1</span> <span class="k">AS</span> <span class="n">b</span> |
| <span class="o">|></span> <span class="k">AS</span> <span class="n">lhs</span> |
| <span class="o">|></span> <span class="k">JOIN</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span> <span class="n">rhs</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span> <span class="k">ON</span> <span class="p">(</span><span class="n">lhs</span><span class="p">.</span><span class="n">a</span> <span class="o">=</span> <span class="n">rhs</span><span class="p">.</span><span class="n">a</span><span class="p">);</span> |
| |
| <span class="o">+</span><span class="c1">---+---+---+---+</span> |
| <span class="o">|</span> <span class="n">a</span><span class="o">|</span> <span class="n">b</span><span class="o">|</span> <span class="k">c</span><span class="o">|</span> <span class="n">d</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+---+---+---+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">1</span><span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">2</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+---+---+---+</span> |
| |
| <span class="k">VALUES</span> <span class="p">(</span><span class="s1">'apples'</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="p">(</span><span class="s1">'bananas'</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="n">t</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">sales</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">AS</span> <span class="n">produce_sales</span> |
| <span class="o">|></span> <span class="k">LEFT</span> <span class="k">JOIN</span> |
| <span class="p">(</span><span class="k">SELECT</span> <span class="nv">"apples"</span> <span class="k">AS</span> <span class="n">item</span><span class="p">,</span> <span class="mi">123</span> <span class="k">AS</span> <span class="n">id</span><span class="p">)</span> <span class="k">AS</span> <span class="n">produce_data</span> |
| <span class="k">USING</span> <span class="p">(</span><span class="n">item</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">SELECT</span> <span class="n">produce_sales</span><span class="p">.</span><span class="n">item</span><span class="p">,</span> <span class="n">sales</span><span class="p">,</span> <span class="n">id</span><span class="p">;</span> |
| |
| <span class="cm">/*---------+-------+------+ |
| | item | sales | id | |
| +---------+-------+------+ |
| | apples | 3 | 123 | |
| | bananas | 4 | NULL | |
| +---------+-------+------*/</span> |
| </code></pre></div></div> |
| |
| <h4 id="order-by">ORDER BY</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="k">ORDER</span> <span class="k">BY</span> <span class="o"><</span><span class="n">expr</span><span class="o">></span> <span class="p">[</span><span class="k">ASC</span> <span class="o">|</span> <span class="k">DESC</span><span class="p">],</span> <span class="p">...</span> |
| </code></pre></div></div> |
| |
| <p>Returns the input rows after sorting as indicated. Standard modifiers are supported including NULLS |
| FIRST/LAST.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">ORDER</span> <span class="k">BY</span> <span class="n">col</span> <span class="k">DESC</span><span class="p">;</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">1</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| </code></pre></div></div> |
| |
| <h4 id="union-intersect-except">UNION, INTERSECT, EXCEPT</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="p">{</span><span class="k">UNION</span> <span class="o">|</span> <span class="k">INTERSECT</span> <span class="o">|</span> <span class="k">EXCEPT</span><span class="p">}</span> <span class="p">{</span><span class="k">ALL</span> <span class="o">|</span> <span class="k">DISTINCT</span><span class="p">}</span> <span class="p">(</span><span class="o"><</span><span class="n">query</span><span class="o">></span><span class="p">)</span> |
| </code></pre></div></div> |
| |
| <p>Performs the union or other set operation over the combined rows from the input table or subquery.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span> |
| <span class="o">|></span> <span class="k">UNION</span> <span class="k">ALL</span> <span class="k">VALUES</span> <span class="p">(</span><span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="k">c</span><span class="p">,</span> <span class="n">d</span><span class="p">);</span> |
| |
| <span class="o">+</span><span class="c1">---+----+</span> |
| <span class="o">|</span> <span class="n">a</span><span class="o">|</span> <span class="n">b</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+----+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> <span class="mi">1</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">2</span><span class="o">|</span> <span class="mi">3</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+----+</span> |
| </code></pre></div></div> |
| |
| <h4 id="tablesample">TABLESAMPLE</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="n">TABLESAMPLE</span> <span class="o"><</span><span class="k">method</span><span class="o">></span><span class="p">(</span><span class="o"><</span><span class="k">size</span><span class="o">></span> <span class="p">{</span><span class="k">ROWS</span> <span class="o">|</span> <span class="n">PERCENT</span><span class="p">})</span> |
| </code></pre></div></div> |
| |
| <p>Returns the subset of rows chosen by the provided sampling algorithm.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="n">TABLESAMPLE</span> <span class="p">(</span><span class="mi">1</span> <span class="k">ROWS</span><span class="p">);</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| |
| <span class="k">VALUES</span> <span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="n">tab</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> |
| <span class="o">|></span> <span class="n">TABLESAMPLE</span> <span class="p">(</span><span class="mi">100</span> <span class="n">PERCENT</span><span class="p">);</span> |
| |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span><span class="n">col</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">|</span> <span class="mi">0</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">---+</span> |
| </code></pre></div></div> |
| |
| <h4 id="pivot">PIVOT</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="n">PIVOT</span> <span class="p">(</span><span class="n">agg_expr</span> <span class="k">FOR</span> <span class="n">col</span> <span class="k">IN</span> <span class="p">(</span><span class="n">val1</span><span class="p">,</span> <span class="p">...))</span> |
| </code></pre></div></div> |
| |
| <p>Returns a new table with the input rows pivoted to become columns.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> |
| <span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">10000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">20000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">5000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">48000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">30000</span><span class="p">)</span> |
| <span class="n">courseSales</span><span class="p">(</span><span class="n">course</span><span class="p">,</span> <span class="nb">year</span><span class="p">,</span> <span class="n">earnings</span><span class="p">)</span> |
| <span class="o">|></span> <span class="n">PIVOT</span> <span class="p">(</span> |
| <span class="k">SUM</span><span class="p">(</span><span class="n">earnings</span><span class="p">)</span> |
| <span class="k">FOR</span> <span class="n">COURSE</span> <span class="k">IN</span> <span class="p">(</span><span class="s1">'dotNET'</span><span class="p">,</span> <span class="s1">'Java'</span><span class="p">)</span> |
| <span class="p">)</span> |
| |
| <span class="o">+</span><span class="c1">----+------+------+</span> |
| <span class="o">|</span><span class="nb">year</span><span class="o">|</span><span class="n">dotNET</span><span class="o">|</span> <span class="n">Java</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">----+------+------+</span> |
| <span class="o">|</span><span class="mi">2012</span><span class="o">|</span> <span class="mi">15000</span><span class="o">|</span> <span class="mi">20000</span><span class="o">|</span> |
| <span class="o">|</span><span class="mi">2013</span><span class="o">|</span> <span class="mi">48000</span><span class="o">|</span> <span class="mi">30000</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">----+------+------+</span> |
| </code></pre></div></div> |
| |
| <h4 id="unpivot">UNPIVOT</h4> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">|></span> <span class="n">UNPIVOT</span> <span class="p">(</span><span class="n">value_col</span> <span class="k">FOR</span> <span class="n">key_col</span> <span class="k">IN</span> <span class="p">(</span><span class="n">col1</span><span class="p">,</span> <span class="p">...))</span> |
| </code></pre></div></div> |
| |
| <p>Returns a new table with the input columns pivoted to become rows.</p> |
| |
| <p>For example:</p> |
| |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">VALUES</span> |
| <span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">10000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">20000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2012</span><span class="p">,</span> <span class="mi">5000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"dotNET"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">48000</span><span class="p">),</span> |
| <span class="p">(</span><span class="nv">"Java"</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">30000</span><span class="p">)</span> |
| <span class="n">courseSales</span><span class="p">(</span><span class="n">course</span><span class="p">,</span> <span class="nb">year</span><span class="p">,</span> <span class="n">earnings</span><span class="p">)</span> |
| <span class="o">|></span> <span class="n">UNPIVOT</span> <span class="p">(</span> |
| <span class="n">earningsYear</span> <span class="k">FOR</span> <span class="nv">`year`</span> <span class="k">IN</span> <span class="p">(</span><span class="nv">`2012`</span><span class="p">,</span> <span class="nv">`2013`</span><span class="p">,</span> <span class="nv">`2014`</span><span class="p">)</span> |
| |
| <span class="o">+</span><span class="c1">--------+------+--------+</span> |
| <span class="o">|</span> <span class="n">course</span><span class="o">|</span> <span class="nb">year</span><span class="o">|</span><span class="n">earnings</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">--------+------+--------+</span> |
| <span class="o">|</span> <span class="n">Java</span><span class="o">|</span> <span class="mi">2012</span><span class="o">|</span> <span class="mi">20000</span><span class="o">|</span> |
| <span class="o">|</span> <span class="n">Java</span><span class="o">|</span> <span class="mi">2013</span><span class="o">|</span> <span class="mi">30000</span><span class="o">|</span> |
| <span class="o">|</span> <span class="n">dotNET</span><span class="o">|</span> <span class="mi">2012</span><span class="o">|</span> <span class="mi">15000</span><span class="o">|</span> |
| <span class="o">|</span> <span class="n">dotNET</span><span class="o">|</span> <span class="mi">2013</span><span class="o">|</span> <span class="mi">48000</span><span class="o">|</span> |
| <span class="o">|</span> <span class="n">dotNET</span><span class="o">|</span> <span class="mi">2014</span><span class="o">|</span> <span class="mi">22500</span><span class="o">|</span> |
| <span class="o">+</span><span class="c1">--------+------+--------+</span> |
| </code></pre></div></div> |
| |
| |
| |
| </div> |
| |
| <!-- /container --> |
| </div> |
| |
| <script src="js/vendor/jquery-3.5.1.min.js"></script> |
| <script src="js/vendor/bootstrap.bundle.min.js"></script> |
| |
| <script src="js/vendor/anchor.min.js"></script> |
| <script src="js/main.js"></script> |
| |
| <script type="text/javascript" src="js/vendor/docsearch.min.js"></script> |
| <script type="text/javascript"> |
| // DocSearch is entirely free and automated. DocSearch is built in two parts: |
| // 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link |
| // in your website and extract content from every page it traverses. It then pushes this |
| // content to an Algolia index. |
| // 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index |
| // to your search input and display its results in a dropdown UI. If you want to find more |
| // details on how works DocSearch, check the docs of DocSearch. |
| docsearch({ |
| apiKey: 'd62f962a82bc9abb53471cb7b89da35e', |
| appId: 'RAI69RXRSK', |
| indexName: 'apache_spark', |
| inputSelector: '#docsearch-input', |
| enhancedSearchInput: true, |
| algoliaOptions: { |
| 'facetFilters': ["version:4.1.0-preview1"] |
| }, |
| debug: false // Set debug to true if you want to inspect the dropdown |
| }); |
| |
| </script> |
| |
| <!-- MathJax Section --> |
| <script type="text/x-mathjax-config"> |
| MathJax.Hub.Config({ |
| TeX: { equationNumbers: { autoNumber: "AMS" } } |
| }); |
| </script> |
| <script> |
| // Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS. |
| // We could use "//cdn.mathjax...", but that won't support "file://". |
| (function(d, script) { |
| script = d.createElement('script'); |
| script.type = 'text/javascript'; |
| script.async = true; |
| script.onload = function(){ |
| MathJax.Hub.Config({ |
| tex2jax: { |
| inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ], |
| displayMath: [ ["$$","$$"], ["\\[", "\\]"] ], |
| processEscapes: true, |
| skipTags: ['script', 'noscript', 'style', 'textarea', 'pre'] |
| } |
| }); |
| }; |
| script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') + |
| 'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' + |
| '?config=TeX-AMS-MML_HTMLorMML'; |
| d.getElementsByTagName('head')[0].appendChild(script); |
| }(document)); |
| </script> |
| </body> |
| </html> |