| |
| <!DOCTYPE html> |
| <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]--> |
| <!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]--> |
| <!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>Spark SQL CLI - Spark 3.5.0 Documentation</title> |
| |
| |
| |
| |
| |
| <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" |
| integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous"> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet"> |
| <link href="css/custom.css" rel="stylesheet"> |
| <script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script> |
| |
| <link rel="stylesheet" href="css/pygments-default.css"> |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" /> |
| <link rel="stylesheet" href="css/docsearch.css"> |
| |
| </head> |
| <body class="global"> |
| <!--[if lt IE 7]> |
| <p class="chromeframe">You are using an outdated browser. <a href="https://browsehappy.com/">Upgrade your browser today</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to better experience this site.</p> |
| <![endif]--> |
| |
| <!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html --> |
| |
| <nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar"> |
| <div class="navbar-brand"><a href="index.html"> |
| <img src="img/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">3.5.0</span> |
| </div> |
| <button class="navbar-toggler" type="button" data-toggle="collapse" |
| data-target="#navbarCollapse" aria-controls="navbarCollapse" |
| aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| <div class="collapse navbar-collapse" id="navbarCollapse"> |
| <ul class="navbar-nav me-auto"> |
| <li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a> |
| <div class="dropdown-menu" aria-labelledby="navbarQuickStart"> |
| <a class="dropdown-item" href="quick-start.html">Quick Start</a> |
| <a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a> |
| <a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a> |
| <a class="dropdown-item" href="structured-streaming-programming-guide.html">Structured Streaming</a> |
| <a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a> |
| <a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a> |
| <a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a> |
| <a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a> |
| <a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a> |
| <div class="dropdown-menu" aria-labelledby="navbarAPIDocs"> |
| <a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a> |
| <a class="dropdown-item" href="api/java/index.html">Java</a> |
| <a class="dropdown-item" href="api/python/index.html">Python</a> |
| <a class="dropdown-item" href="api/R/index.html">R</a> |
| <a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a> |
| <div class="dropdown-menu" aria-labelledby="navbarDeploying"> |
| <a class="dropdown-item" href="cluster-overview.html">Overview</a> |
| <a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a> |
| <div class="dropdown-divider"></div> |
| <a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a> |
| <a class="dropdown-item" href="running-on-mesos.html">Mesos</a> |
| <a class="dropdown-item" href="running-on-yarn.html">YARN</a> |
| <a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a> |
| </div> |
| </li> |
| |
| <li class="nav-item dropdown"> |
| <a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a> |
| <div class="dropdown-menu" aria-labelledby="navbarMore"> |
| <a class="dropdown-item" href="configuration.html">Configuration</a> |
| <a class="dropdown-item" href="monitoring.html">Monitoring</a> |
| <a class="dropdown-item" href="tuning.html">Tuning Guide</a> |
| <a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a> |
| <a class="dropdown-item" href="security.html">Security</a> |
| <a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a> |
| <a class="dropdown-item" href="migration-guide.html">Migration Guide</a> |
| <div class="dropdown-divider"></div> |
| <a class="dropdown-item" href="building-spark.html">Building Spark</a> |
| <a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a> |
| <a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a> |
| </div> |
| </li> |
| |
| <li class="nav-item"> |
| <input type="text" id="docsearch-input" placeholder="Search the docs…"> |
| </li> |
| </ul> |
| <!--<span class="navbar-text navbar-right"><span class="version-text">v3.5.0</span></span>--> |
| </div> |
| </nav> |
| |
| |
| |
| <div class="container"> |
| |
| |
| |
| <div class="left-menu-wrapper"> |
| <div class="left-menu"> |
| <h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3> |
| |
| <ul> |
| |
| <li> |
| <a href="sql-getting-started.html"> |
| |
| Getting Started |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-data-sources.html"> |
| |
| Data Sources |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-performance-tuning.html"> |
| |
| Performance Tuning |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-distributed-sql-engine.html"> |
| |
| Distributed SQL Engine |
| |
| </a> |
| </li> |
| |
| |
| |
| <ul> |
| |
| <li> |
| <a href="sql-distributed-sql-engine.html#running-the-thrift-jdbcodbc-server"> |
| |
| Running the Thrift JDBC/ODBC server |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-distributed-sql-engine.html#running-the-spark-sql-cli"> |
| |
| Running the Spark SQL CLI |
| |
| </a> |
| </li> |
| |
| |
| |
| </ul> |
| |
| |
| |
| <li> |
| <a href="sql-pyspark-pandas-with-arrow.html"> |
| |
| PySpark Usage Guide for Pandas with Apache Arrow |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-migration-guide.html"> |
| |
| Migration Guide |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-ref.html"> |
| |
| SQL Reference |
| |
| </a> |
| </li> |
| |
| |
| |
| <li> |
| <a href="sql-error-conditions.html"> |
| |
| Error Conditions |
| |
| </a> |
| </li> |
| |
| |
| |
| </ul> |
| |
| </div> |
| </div> |
| |
| <input id="nav-trigger" class="nav-trigger" checked type="checkbox"> |
| <label for="nav-trigger"></label> |
| <div class="content-with-sidebar mr-3" id="content"> |
| |
| <h1 class="title">Spark SQL CLI</h1> |
| |
| |
| <ul id="markdown-toc"> |
| <li><a href="#spark-sql-command-line-options" id="markdown-toc-spark-sql-command-line-options">Spark SQL Command Line Options</a></li> |
| <li><a href="#the-hiverc-file" id="markdown-toc-the-hiverc-file">The hiverc File</a></li> |
| <li><a href="#path-interpretation" id="markdown-toc-path-interpretation">Path interpretation</a></li> |
| <li><a href="#supported-comment-types" id="markdown-toc-supported-comment-types">Supported comment types</a></li> |
| <li><a href="#spark-sql-cli-interactive-shell-commands" id="markdown-toc-spark-sql-cli-interactive-shell-commands">Spark SQL CLI Interactive Shell Commands</a></li> |
| <li><a href="#examples" id="markdown-toc-examples">Examples</a></li> |
| </ul> |
| |
| <p>The Spark SQL CLI is a convenient interactive command tool to run the Hive metastore service and execute SQL |
| queries input from the command line. Note that the Spark SQL CLI cannot talk to the Thrift JDBC server.</p> |
| |
| <p>To start the Spark SQL CLI, run the following in the Spark directory:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql |
| </code></pre></div></div> |
| |
| <p>Configuration of Hive is done by placing your <code class="language-plaintext highlighter-rouge">hive-site.xml</code>, <code class="language-plaintext highlighter-rouge">core-site.xml</code> and <code class="language-plaintext highlighter-rouge">hdfs-site.xml</code> files in <code class="language-plaintext highlighter-rouge">conf/</code>.</p> |
| |
| <h2 id="spark-sql-command-line-options">Spark SQL Command Line Options</h2> |
| |
| <p>You may run <code class="language-plaintext highlighter-rouge">./bin/spark-sql --help</code> for a complete list of all available options.</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>CLI options: |
| -d,--define <key=value> Variable substitution to apply to Hive |
| commands. e.g. -d A=B or --define A=B |
| --database <databasename> Specify the database to use |
| -e <quoted-query-string> SQL from command line |
| -f <filename> SQL from files |
| -H,--help Print help information |
| --hiveconf <property=value> Use value for given property |
| --hivevar <key=value> Variable substitution to apply to Hive |
| commands. e.g. --hivevar A=B |
| -i <filename> Initialization SQL file |
| -S,--silent Silent mode in interactive shell |
| -v,--verbose Verbose mode (echo executed SQL to the |
| console) |
| </code></pre></div></div> |
| |
| <h2 id="the-hiverc-file">The hiverc File</h2> |
| |
| <p>When invoked without the <code class="language-plaintext highlighter-rouge">-i</code>, the Spark SQL CLI will attempt to load <code class="language-plaintext highlighter-rouge">$HIVE_HOME/bin/.hiverc</code> and <code class="language-plaintext highlighter-rouge">$HOME/.hiverc</code> as initialization files.</p> |
| |
| <h2 id="path-interpretation">Path interpretation</h2> |
| |
| <p>Spark SQL CLI supports running SQL from initialization script file(<code class="language-plaintext highlighter-rouge">-i</code>) or normal SQL file(<code class="language-plaintext highlighter-rouge">-f</code>), If path url don’t have a scheme component, the path will be handled as local file. |
| For example: <code class="language-plaintext highlighter-rouge">/path/to/spark-sql-cli.sql</code> equals to <code class="language-plaintext highlighter-rouge">file:///path/to/spark-sql-cli.sql</code>. User also can use Hadoop supported filesystems such as <code class="language-plaintext highlighter-rouge">s3://<mys3bucket>/path/to/spark-sql-cli.sql</code> or <code class="language-plaintext highlighter-rouge">hdfs://<namenode>:<port>/path/to/spark-sql-cli.sql</code>.</p> |
| |
| <h2 id="supported-comment-types">Supported comment types</h2> |
| |
| <table class="table table-striped"> |
| <thead><tr><th>Comment</th><th>Example</th></tr></thead> |
| <tr> |
| <td>simple comment</td> |
| <td> |
| <code> |
| -- This is a simple comment. |
| <br /> |
| SELECT 1; |
| </code> |
| </td> |
| </tr> |
| <tr> |
| <td>bracketed comment</td> |
| <td> |
| <code> |
| /* This is a bracketed comment. */ |
| <br /> |
| SELECT 1; |
| </code> |
| </td> |
| </tr> |
| <tr> |
| <td>nested bracketed comment</td> |
| <td> |
| <code> |
| /* This is a /* nested bracketed comment*/ .*/ |
| <br /> |
| SELECT 1; |
| </code> |
| </td> |
| </tr> |
| </table> |
| |
| <h2 id="spark-sql-cli-interactive-shell-commands">Spark SQL CLI Interactive Shell Commands</h2> |
| |
| <p>When <code class="language-plaintext highlighter-rouge">./bin/spark-sql</code> is run without either the <code class="language-plaintext highlighter-rouge">-e</code> or <code class="language-plaintext highlighter-rouge">-f</code> option, it enters interactive shell mode. |
| Use <code class="language-plaintext highlighter-rouge">;</code> (semicolon) to terminate commands. Notice:</p> |
| <ol> |
| <li>The CLI use <code class="language-plaintext highlighter-rouge">;</code> to terminate commands only when it’s at the end of line, and it’s not escaped by <code class="language-plaintext highlighter-rouge">\\;</code>.</li> |
| <li><code class="language-plaintext highlighter-rouge">;</code> is the only way to terminate commands. If the user types <code class="language-plaintext highlighter-rouge">SELECT 1</code> and presses enter, the console will just wait for input.</li> |
| <li>If the user types multiple commands in one line like <code class="language-plaintext highlighter-rouge">SELECT 1; SELECT 2;</code>, the commands <code class="language-plaintext highlighter-rouge">SELECT 1</code> and <code class="language-plaintext highlighter-rouge">SELECT 2</code> will be executed separately.</li> |
| <li>If <code class="language-plaintext highlighter-rouge">;</code> appears within a SQL statement (not the end of the line), then it has no special meanings: |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- This is a ; comment</span> |
| <span class="k">SELECT</span> <span class="s1">';'</span> <span class="k">as</span> <span class="n">a</span><span class="p">;</span> |
| </code></pre></div> </div> |
| <p>This is just a comment line followed by a SQL query which returns a string literal.</p> |
| <div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="cm">/* This is a comment contains ; |
| */</span> <span class="k">SELECT</span> <span class="mi">1</span><span class="p">;</span> |
| </code></pre></div> </div> |
| <p>However, if ‘;’ is the end of the line, it terminates the SQL statement. The example above will be terminated into <code class="language-plaintext highlighter-rouge">/* This is a comment contains </code> and <code class="language-plaintext highlighter-rouge">*/ SELECT 1</code>, Spark will submit these two commands separated and throw parser error (<code class="language-plaintext highlighter-rouge">unclosed bracketed comment</code> and <code class="language-plaintext highlighter-rouge">Syntax error at or near '*/'</code>).</p> |
| </li> |
| </ol> |
| |
| <table class="table table-striped"> |
| <thead><tr><th>Command</th><th>Description</th></tr></thead> |
| <tr> |
| <td><code>quit</code> or <code>exit</code></td> |
| <td>Exits the interactive shell.</td> |
| </tr> |
| <tr> |
| <td><code>!<command></code></td> |
| <td>Executes a shell command from the Spark SQL CLI shell.</td> |
| </tr> |
| <tr> |
| <td><code>dfs <HDFS dfs command></code></td> |
| <td>Executes a HDFS <a href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSCommands.html#dfs">dfs command</a> from the Spark SQL CLI shell.</td> |
| </tr> |
| <tr> |
| <td><code><query string></code></td> |
| <td>Executes a Spark SQL query and prints results to standard output.</td> |
| </tr> |
| <tr> |
| <td><code>source <filepath></code></td> |
| <td>Executes a script file inside the CLI.</td> |
| </tr> |
| </table> |
| |
| <h2 id="examples">Examples</h2> |
| |
| <p>Example of running a query from the command line:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -e 'SELECT COL FROM TBL' |
| </code></pre></div></div> |
| |
| <p>Example of setting Hive configuration variables:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -e 'SELECT COL FROM TBL' --hiveconf hive.exec.scratchdir=/home/my/hive_scratch |
| </code></pre></div></div> |
| |
| <p>Example of setting Hive configuration variables and using it in the SQL query:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -e 'SELECT ${hiveconf:aaa}' --hiveconf aaa=bbb --hiveconf hive.exec.scratchdir=/home/my/hive_scratch |
| spark-sql> SELECT ${aaa}; |
| bbb |
| </code></pre></div></div> |
| |
| <p>Example of setting Hive variables substitution:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql --hivevar aaa=bbb --define ccc=ddd |
| spark-sql> SELECT ${aaa}, ${ccc}; |
| bbb ddd |
| </code></pre></div></div> |
| |
| <p>Example of dumping data out from a query into a file using silent mode:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -S -e 'SELECT COL FROM TBL' > result.txt |
| </code></pre></div></div> |
| |
| <p>Example of running a script non-interactively:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -f /path/to/spark-sql-script.sql |
| </code></pre></div></div> |
| |
| <p>Example of running an initialization script before entering interactive mode:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql -i /path/to/spark-sql-init.sql |
| </code></pre></div></div> |
| |
| <p>Example of entering interactive mode:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql |
| spark-sql> SELECT 1; |
| 1 |
| spark-sql> -- This is a simple comment. |
| spark-sql> SELECT 1; |
| 1 |
| </code></pre></div></div> |
| |
| <p>Example of entering interactive mode with escape <code class="language-plaintext highlighter-rouge">;</code> in comment:</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>./bin/spark-sql |
| spark-sql>/* This is a comment contains \\; |
| > It won't be terminated by \\; */ |
| > SELECT 1; |
| 1 |
| </code></pre></div></div> |
| |
| |
| </div> |
| |
| <!-- /container --> |
| </div> |
| |
| <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" |
| integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" |
| crossorigin="anonymous"></script> |
| <script src="https://code.jquery.com/jquery.js"></script> |
| |
| <script src="js/vendor/anchor.min.js"></script> |
| <script src="js/main.js"></script> |
| |
| <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script> |
| <script type="text/javascript"> |
| // DocSearch is entirely free and automated. DocSearch is built in two parts: |
| // 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link |
| // in your website and extract content from every page it traverses. It then pushes this |
| // content to an Algolia index. |
| // 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index |
| // to your search input and display its results in a dropdown UI. If you want to find more |
| // details on how works DocSearch, check the docs of DocSearch. |
| docsearch({ |
| apiKey: 'd62f962a82bc9abb53471cb7b89da35e', |
| appId: 'RAI69RXRSK', |
| indexName: 'apache_spark', |
| inputSelector: '#docsearch-input', |
| enhancedSearchInput: true, |
| algoliaOptions: { |
| 'facetFilters': ["version:3.5.0"] |
| }, |
| debug: false // Set debug to true if you want to inspect the dropdown |
| }); |
| |
| </script> |
| |
| <!-- MathJax Section --> |
| <script type="text/x-mathjax-config"> |
| MathJax.Hub.Config({ |
| TeX: { equationNumbers: { autoNumber: "AMS" } } |
| }); |
| </script> |
| <script> |
| // Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS. |
| // We could use "//cdn.mathjax...", but that won't support "file://". |
| (function(d, script) { |
| script = d.createElement('script'); |
| script.type = 'text/javascript'; |
| script.async = true; |
| script.onload = function(){ |
| MathJax.Hub.Config({ |
| tex2jax: { |
| inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ], |
| displayMath: [ ["$$","$$"], ["\\[", "\\]"] ], |
| processEscapes: true, |
| skipTags: ['script', 'noscript', 'style', 'textarea', 'pre'] |
| } |
| }); |
| }; |
| script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') + |
| 'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' + |
| '?config=TeX-AMS-MML_HTMLorMML'; |
| d.getElementsByTagName('head')[0].appendChild(script); |
| }(document)); |
| </script> |
| </body> |
| </html> |