blob: aee3edc13323aa5afdd067bae9ee5dafe2f2bbd3 [file] [log] [blame]
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Spark SQL CLI - Spark 3.5.3 Documentation</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="css/custom.css" rel="stylesheet">
<script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
<link rel="stylesheet" href="css/pygments-default.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
<link rel="stylesheet" href="css/docsearch.css">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<!--[if lt IE 7]>
<p class="chromeframe">You are using an outdated browser. <a href="https://browsehappy.com/">Upgrade your browser today</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to better experience this site.</p>
<![endif]-->
<!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
<div class="navbar-brand"><a href="index.html">
<img src="img/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">3.5.3</span>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse"
data-target="#navbarCollapse" aria-controls="navbarCollapse"
aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto">
<li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
<div class="dropdown-menu" aria-labelledby="navbarQuickStart">
<a class="dropdown-item" href="quick-start.html">Quick Start</a>
<a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
<a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
<a class="dropdown-item" href="structured-streaming-programming-guide.html">Structured Streaming</a>
<a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
<a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
<a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
<a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
<a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
<div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
<a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
<a class="dropdown-item" href="api/java/index.html">Java</a>
<a class="dropdown-item" href="api/python/index.html">Python</a>
<a class="dropdown-item" href="api/R/index.html">R</a>
<a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
<div class="dropdown-menu" aria-labelledby="navbarDeploying">
<a class="dropdown-item" href="cluster-overview.html">Overview</a>
<a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
<a class="dropdown-item" href="running-on-mesos.html">Mesos</a>
<a class="dropdown-item" href="running-on-yarn.html">YARN</a>
<a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
<div class="dropdown-menu" aria-labelledby="navbarMore">
<a class="dropdown-item" href="configuration.html">Configuration</a>
<a class="dropdown-item" href="monitoring.html">Monitoring</a>
<a class="dropdown-item" href="tuning.html">Tuning Guide</a>
<a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
<a class="dropdown-item" href="security.html">Security</a>
<a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
<a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="building-spark.html">Building Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
</div>
</li>
<li class="nav-item">
<input type="text" id="docsearch-input" placeholder="Search the docs…">
</li>
</ul>
<!--<span class="navbar-text navbar-right"><span class="version-text">v3.5.3</span></span>-->
</div>
</nav>
<div class="container">
<div class="left-menu-wrapper">
<div class="left-menu">
<h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3>
<ul>
<li>
<a href="sql-getting-started.html">
Getting Started
</a>
</li>
<li>
<a href="sql-data-sources.html">
Data Sources
</a>
</li>
<li>
<a href="sql-performance-tuning.html">
Performance Tuning
</a>
</li>
<li>
<a href="sql-distributed-sql-engine.html">
Distributed SQL Engine
</a>
</li>
<ul>
<li>
<a href="sql-distributed-sql-engine.html#running-the-thrift-jdbcodbc-server">
Running the Thrift JDBC/ODBC server
</a>
</li>
<li>
<a href="sql-distributed-sql-engine.html#running-the-spark-sql-cli">
Running the Spark SQL CLI
</a>
</li>
</ul>
<li>
<a href="sql-pyspark-pandas-with-arrow.html">
PySpark Usage Guide for Pandas with Apache Arrow
</a>
</li>
<li>
<a href="sql-migration-guide.html">
Migration Guide
</a>
</li>
<li>
<a href="sql-ref.html">
SQL Reference
</a>
</li>
<li>
<a href="sql-error-conditions.html">
Error Conditions
</a>
</li>
</ul>
</div>
</div>
<input id="nav-trigger" class="nav-trigger" checked type="checkbox">
<label for="nav-trigger"></label>
<div class="content-with-sidebar mr-3" id="content">
<h1 class="title">Spark SQL CLI</h1>
<ul id="markdown-toc">
<li><a href="#spark-sql-command-line-options" id="markdown-toc-spark-sql-command-line-options">Spark SQL Command Line Options</a></li>
<li><a href="#the-hiverc-file" id="markdown-toc-the-hiverc-file">The hiverc File</a></li>
<li><a href="#path-interpretation" id="markdown-toc-path-interpretation">Path interpretation</a></li>
<li><a href="#supported-comment-types" id="markdown-toc-supported-comment-types">Supported comment types</a></li>
<li><a href="#spark-sql-cli-interactive-shell-commands" id="markdown-toc-spark-sql-cli-interactive-shell-commands">Spark SQL CLI Interactive Shell Commands</a></li>
<li><a href="#examples" id="markdown-toc-examples">Examples</a></li>
</ul>
<p>The Spark SQL CLI is a convenient interactive command tool to run the Hive metastore service and execute SQL
queries input from the command line. Note that the Spark SQL CLI cannot talk to the Thrift JDBC server.</p>
<p>To start the Spark SQL CLI, run the following in the Spark directory:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql
</code></pre></div></div>
<p>Configuration of Hive is done by placing your <code class="language-plaintext highlighter-rouge">hive-site.xml</code>, <code class="language-plaintext highlighter-rouge">core-site.xml</code> and <code class="language-plaintext highlighter-rouge">hdfs-site.xml</code> files in <code class="language-plaintext highlighter-rouge">conf/</code>.</p>
<h2 id="spark-sql-command-line-options">Spark SQL Command Line Options</h2>
<p>You may run <code class="language-plaintext highlighter-rouge">./bin/spark-sql --help</code> for a complete list of all available options.</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>CLI options:
-d,--define &lt;key=value&gt; Variable substitution to apply to Hive
commands. e.g. -d A=B or --define A=B
--database &lt;databasename&gt; Specify the database to use
-e &lt;quoted-query-string&gt; SQL from command line
-f &lt;filename&gt; SQL from files
-H,--help Print help information
--hiveconf &lt;property=value&gt; Use value for given property
--hivevar &lt;key=value&gt; Variable substitution to apply to Hive
commands. e.g. --hivevar A=B
-i &lt;filename&gt; Initialization SQL file
-S,--silent Silent mode in interactive shell
-v,--verbose Verbose mode (echo executed SQL to the
console)
</code></pre></div></div>
<h2 id="the-hiverc-file">The hiverc File</h2>
<p>When invoked without the <code class="language-plaintext highlighter-rouge">-i</code>, the Spark SQL CLI will attempt to load <code class="language-plaintext highlighter-rouge">$HIVE_HOME/bin/.hiverc</code> and <code class="language-plaintext highlighter-rouge">$HOME/.hiverc</code> as initialization files.</p>
<h2 id="path-interpretation">Path interpretation</h2>
<p>Spark SQL CLI supports running SQL from initialization script file(<code class="language-plaintext highlighter-rouge">-i</code>) or normal SQL file(<code class="language-plaintext highlighter-rouge">-f</code>), If path url don&#8217;t have a scheme component, the path will be handled as local file.
For example: <code class="language-plaintext highlighter-rouge">/path/to/spark-sql-cli.sql</code> equals to <code class="language-plaintext highlighter-rouge">file:///path/to/spark-sql-cli.sql</code>. User also can use Hadoop supported filesystems such as <code class="language-plaintext highlighter-rouge">s3://&lt;mys3bucket&gt;/path/to/spark-sql-cli.sql</code> or <code class="language-plaintext highlighter-rouge">hdfs://&lt;namenode&gt;:&lt;port&gt;/path/to/spark-sql-cli.sql</code>.</p>
<h2 id="supported-comment-types">Supported comment types</h2>
<table>
<thead><tr><th>Comment</th><th>Example</th></tr></thead>
<tr>
<td>simple comment</td>
<td>
<code>
-- This is a simple comment.
<br />
SELECT 1;
</code>
</td>
</tr>
<tr>
<td>bracketed comment</td>
<td>
<code>
/* This is a bracketed comment. */
<br />
SELECT 1;
</code>
</td>
</tr>
<tr>
<td>nested bracketed comment</td>
<td>
<code>
/* This is a /* nested bracketed comment*/ .*/
<br />
SELECT 1;
</code>
</td>
</tr>
</table>
<h2 id="spark-sql-cli-interactive-shell-commands">Spark SQL CLI Interactive Shell Commands</h2>
<p>When <code class="language-plaintext highlighter-rouge">./bin/spark-sql</code> is run without either the <code class="language-plaintext highlighter-rouge">-e</code> or <code class="language-plaintext highlighter-rouge">-f</code> option, it enters interactive shell mode.
Use <code class="language-plaintext highlighter-rouge">;</code> (semicolon) to terminate commands. Notice:</p>
<ol>
<li>The CLI use <code class="language-plaintext highlighter-rouge">;</code> to terminate commands only when it&#8217;s at the end of line, and it&#8217;s not escaped by <code class="language-plaintext highlighter-rouge">\\;</code>.</li>
<li><code class="language-plaintext highlighter-rouge">;</code> is the only way to terminate commands. If the user types <code class="language-plaintext highlighter-rouge">SELECT 1</code> and presses enter, the console will just wait for input.</li>
<li>If the user types multiple commands in one line like <code class="language-plaintext highlighter-rouge">SELECT 1; SELECT 2;</code>, the commands <code class="language-plaintext highlighter-rouge">SELECT 1</code> and <code class="language-plaintext highlighter-rouge">SELECT 2</code> will be executed separately.</li>
<li>If <code class="language-plaintext highlighter-rouge">;</code> appears within a SQL statement (not the end of the line), then it has no special meanings:
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- This is a ; comment</span>
<span class="k">SELECT</span> <span class="s1">';'</span> <span class="k">as</span> <span class="n">a</span><span class="p">;</span>
</code></pre></div> </div>
<p>This is just a comment line followed by a SQL query which returns a string literal.</p>
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="cm">/* This is a comment contains ;
*/</span> <span class="k">SELECT</span> <span class="mi">1</span><span class="p">;</span>
</code></pre></div> </div>
<p>However, if &#8216;;&#8217; is the end of the line, it terminates the SQL statement. The example above will be terminated into <code class="language-plaintext highlighter-rouge">/* This is a comment contains </code> and <code class="language-plaintext highlighter-rouge">*/ SELECT 1</code>, Spark will submit these two commands separated and throw parser error (<code class="language-plaintext highlighter-rouge">unclosed bracketed comment</code> and <code class="language-plaintext highlighter-rouge">Syntax error at or near '*/'</code>).</p>
</li>
</ol>
<table>
<thead><tr><th>Command</th><th>Description</th></tr></thead>
<tr>
<td><code>quit</code> or <code>exit</code></td>
<td>Exits the interactive shell.</td>
</tr>
<tr>
<td><code>!&lt;command&gt;</code></td>
<td>Executes a shell command from the Spark SQL CLI shell.</td>
</tr>
<tr>
<td><code>dfs &lt;HDFS dfs command&gt;</code></td>
<td>Executes a HDFS <a href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSCommands.html#dfs">dfs command</a> from the Spark SQL CLI shell.</td>
</tr>
<tr>
<td><code>&lt;query string&gt;</code></td>
<td>Executes a Spark SQL query and prints results to standard output.</td>
</tr>
<tr>
<td><code>source &lt;filepath&gt;</code></td>
<td>Executes a script file inside the CLI.</td>
</tr>
</table>
<h2 id="examples">Examples</h2>
<p>Example of running a query from the command line:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -e 'SELECT COL FROM TBL'
</code></pre></div></div>
<p>Example of setting Hive configuration variables:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -e 'SELECT COL FROM TBL' --hiveconf hive.exec.scratchdir=/home/my/hive_scratch
</code></pre></div></div>
<p>Example of setting Hive configuration variables and using it in the SQL query:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -e 'SELECT ${hiveconf:aaa}' --hiveconf aaa=bbb --hiveconf hive.exec.scratchdir=/home/my/hive_scratch
spark-sql&gt; SELECT ${aaa};
bbb
</code></pre></div></div>
<p>Example of setting Hive variables substitution:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql --hivevar aaa=bbb --define ccc=ddd
spark-sql&gt; SELECT ${aaa}, ${ccc};
bbb ddd
</code></pre></div></div>
<p>Example of dumping data out from a query into a file using silent mode:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -S -e 'SELECT COL FROM TBL' &gt; result.txt
</code></pre></div></div>
<p>Example of running a script non-interactively:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -f /path/to/spark-sql-script.sql
</code></pre></div></div>
<p>Example of running an initialization script before entering interactive mode:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -i /path/to/spark-sql-init.sql
</code></pre></div></div>
<p>Example of entering interactive mode:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql
spark-sql&gt; SELECT 1;
1
spark-sql&gt; -- This is a simple comment.
spark-sql&gt; SELECT 1;
1
</code></pre></div></div>
<p>Example of entering interactive mode with escape <code class="language-plaintext highlighter-rouge">;</code> in comment:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql
spark-sql&gt;/* This is a comment contains \\;
&gt; It won't be terminated by \\; */
&gt; SELECT 1;
1
</code></pre></div></div>
</div>
<!-- /container -->
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="js/vendor/anchor.min.js"></script>
<script src="js/main.js"></script>
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
<script type="text/javascript">
// DocSearch is entirely free and automated. DocSearch is built in two parts:
// 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link
// in your website and extract content from every page it traverses. It then pushes this
// content to an Algolia index.
// 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
// to your search input and display its results in a dropdown UI. If you want to find more
// details on how works DocSearch, check the docs of DocSearch.
docsearch({
apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
appId: 'RAI69RXRSK',
indexName: 'apache_spark',
inputSelector: '#docsearch-input',
enhancedSearchInput: true,
algoliaOptions: {
'facetFilters': ["version:3.5.3"]
},
debug: false // Set debug to true if you want to inspect the dropdown
});
</script>
<!-- MathJax Section -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
TeX: { equationNumbers: { autoNumber: "AMS" } }
});
</script>
<script>
// Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS.
// We could use "//cdn.mathjax...", but that won't support "file://".
(function(d, script) {
script = d.createElement('script');
script.type = 'text/javascript';
script.async = true;
script.onload = function(){
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
});
};
script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') +
'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' +
'?config=TeX-AMS-MML_HTMLorMML';
d.getElementsByTagName('head')[0].appendChild(script);
}(document));
</script>
</body>
</html>