releases/1.1.0/docs/kudu_impala_integration.html - kudu-site - Git at Google

 <!DOCTYPE html>
 <html lang="en">
   <head>
     <meta charset="utf-8" />
     <meta http-equiv="X-UA-Compatible" content="IE=edge" />
     <meta name="viewport" content="width=device-width, initial-scale=1" />
     <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
     <meta name="description" content="A new open source Apache Hadoop ecosystem project, Apache Kudu completes Hadoop's storage layer to enable fast analytics on fast data" />
     <meta name="author" content="Cloudera" />
     <title>Apache Kudu - Using Apache Kudu with Apache Impala (incubating)</title>
     <!-- Bootstrap core CSS -->
     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"
           integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7"
           crossorigin="anonymous">

     <!-- Custom styles for this template -->
     <link href="/css/kudu.css" rel="stylesheet"/>
     <link href="/css/asciidoc.css" rel="stylesheet"/>
     <link rel="shortcut icon" href="/img/logo-favicon.ico" />
     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.1/css/font-awesome.min.css" />


     <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
     <!--[if lt IE 9]>
         <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
         <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
         <![endif]-->
   </head>
   <body>
     <div class="kudu-site container-fluid">
       <!-- Static navbar -->
         <nav class="navbar navbar-default">
           <div class="container-fluid">
             <div class="navbar-header">
               <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
                 <span class="sr-only">Toggle navigation</span>
                 <span class="icon-bar"></span>
                 <span class="icon-bar"></span>
                 <span class="icon-bar"></span>
               </button>

               <a class="logo" href="/"><img
                 src="//d3dr9sfxru4sde.cloudfront.net/i/k/apachekudu_logo_0716_80px.png"
                 srcset="//d3dr9sfxru4sde.cloudfront.net/i/k/apachekudu_logo_0716_80px.png 1x, //d3dr9sfxru4sde.cloudfront.net/i/k/apachekudu_logo_0716_160px.png 2x"
                 alt="Apache Kudu"/></a>

             </div>
             <div id="navbar" class="collapse navbar-collapse">
               <ul class="nav navbar-nav navbar-right">
                 <li >
                   <a href="/">Home</a>
                 </li>
                 <li >
                   <a href="/overview.html">Overview</a>
                 </li>
                 <li class="active">
                   <a href="/docs/">Documentation</a>
                 </li>
                 <li >
                   <a href="/releases/">Releases</a>
                 </li>
                 <li >
                   <a href="/blog/">Blog</a>
                 </li>
                 <!-- NOTE: this dropdown menu does not appear on Mobile, so don't add anything here
                      that doesn't also appear elsewhere on the site. -->
                 <li class="dropdown">
                   <a href="/community.html" role="button" aria-haspopup="true" aria-expanded="false">Community <span class="caret"></span></a>
                   <ul class="dropdown-menu">
                     <li class="dropdown-header">GET IN TOUCH</li>
                     <li><a class="icon email" href="/community.html">Mailing Lists</a></li>
                     <li><a class="icon slack" href="https://getkudu-slack.herokuapp.com/">Slack Channel</a></li>
                     <li role="separator" class="divider"></li>
                     <li><a href="/community.html#meetups-user-groups-and-conference-presentations">Events and Meetups</a></li>
                     <li><a href="/committers.html">Project Committers</a></li>
                     <!--<li><a href="/roadmap.html">Roadmap</a></li>-->
                     <li><a href="/community.html#contributions">How to Contribute</a></li>
                     <li role="separator" class="divider"></li>
                     <li class="dropdown-header">DEVELOPER RESOURCES</li>
                     <li><a class="icon github" href="https://github.com/apache/incubator-kudu">GitHub</a></li>
                     <li><a class="icon gerrit" href="http://gerrit.cloudera.org:8080/#/q/status:open+project:kudu">Gerrit Code Review</a></li>
                     <li><a class="icon jira" href="https://issues.apache.org/jira/browse/KUDU">JIRA Issue Tracker</a></li>
                     <li role="separator" class="divider"></li>
                     <li class="dropdown-header">SOCIAL MEDIA</li>
                     <li><a class="icon twitter" href="https://twitter.com/ApacheKudu">Twitter</a></li>
                     <li><a href="https://www.reddit.com/r/kudu/">Reddit</a></li>
                     <li role="separator" class="divider"></li>
                     <li class="dropdown-header">APACHE SOFTWARE FOUNDATION</li>
                     <li><a href="https://www.apache.org/security/" target="_blank">Security</a></li>
                     <li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a></li>
                     <li><a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
                     <li><a href="https://www.apache.org/licenses/" target="_blank">License</a></li>
                   </ul>
                 </li>
                 <li >
                   <a href="/faq.html">FAQ</a>
                 </li>
               </ul><!-- /.nav -->
             </div><!-- /#navbar -->
           </div><!-- /.container-fluid -->
         </nav>

 <!--

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->


 <div class="container">
   <div class="row">
     <div class="col-md-9">

 <h1>Using Apache Kudu with Apache Impala (incubating)</h1>
       <div id="preamble">
 <div class="sectionbody">
 <div class="paragraph">
 <p>Kudu has tight integration with Impala, allowing you to use Impala
 to insert, query, update, and delete data from Kudu tablets using Impala&#8217;s SQL
 syntax, as an alternative to using the <a href="installation.html#view_api">Kudu APIs</a>
 to build a custom Kudu application. In addition, you can use JDBC or ODBC to connect
 existing or new applications written in any language, framework, or business intelligence
 tool to your Kudu data, using Impala as the broker.</p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 The following instructions assume a
 <a href="http://www.cloudera.com/content/www/en-us/products/cloudera-manager.html">Cloudera Manager</a>
 deployment. However, you can use Kudu with Impala without Cloudera Manager.
 </td>
 </tr>
 </table>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="_requirements_and_implications"><a class="link" href="#_requirements_and_implications">Requirements and Implications</a></h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>This integration relies on features that released versions of Impala do not have yet.
 In the interim, you need
 to install a fork of Impala, which this document will refer to as <em>Impala_Kudu</em>.</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>You can install Impala_Kudu using parcels or packages.</p>
 </li>
 <li>
 <p>Kudu itself requires CDH 5.4.3 or later. To use Cloudera Manager with Impala_Kudu,
 you need Cloudera Manager 5.4.3 or later. Cloudera Manager 5.4.7 is recommended, as
 it adds support for collecting metrics from Kudu.</p>
 </li>
 <li>
 <p>If you have an existing Impala instance on your cluster, you can install Impala_Kudu
 alongside the existing Impala instance <strong>if you use parcels</strong>. The new instance does
 not share configurations with the existing instance and is completely independent.
 A script is provided to automate this type of installation. See <a href="#install_impala_kudu_parcels_side_by_side">Manual Installation</a>.</p>
 </li>
 <li>
 <p>It is especially important that the cluster has adequate
 unreserved RAM for the Impala_Kudu instance.</p>
 </li>
 <li>
 <p>Consider shutting down the original Impala service when testing Impala_Kudu if you
 want to be sure it is not impacted.</p>
 </li>
 <li>
 <p>Before installing Impala_Kudu, you must have already installed and configured
 services for HDFS (though it is not used by Kudu), the Hive Metastore (where Impala
 stores its metadata), and <a href="installation.html">Kudu</a>. You may need HBase, YARN,
 Sentry, and ZooKeeper services as well. Meeting the Impala installation requirements
 is out of the scope of this document. See
 <a href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/impala_prereqs.html">Impala Prequisites</a>
 in the official Impala documentation for more information.</p>
 </li>
 </ul>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="_installing_impala_kudu_using_cloudera_manager"><a class="link" href="#_installing_impala_kudu_using_cloudera_manager">Installing Impala_Kudu Using Cloudera Manager</a></h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>If you use Cloudera Manager, you can install Impala_Kudu using
 <a href="#install_impala_kudu_parcels">parcels</a> or
 <a href="#install_impala_kudu_packages">packages</a>. However, if you have an existing Impala
 instance, you must use parcels and you should use the instructions provided in
 <a href="#install_impala_kudu_parcels_side_by_side">procedure</a>, rather than these instructions.</p>
 </div>
 <div class="sect2">
 <h3 id="install_impala_kudu_parcels"><a class="link" href="#install_impala_kudu_parcels">Installing the Impala_Kudu Service Using Parcels</a></h3>
 <div class="sect3">
 <h4 id="install_impala_kudu_parcels_side_by_side"><a class="link" href="#install_impala_kudu_parcels_side_by_side">Manual Installation</a></h4>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 Manual installation of Impala_Kudu is only supported where there is no other Impala
 service already running in the cluster, and when you use parcels.
 </td>
 </tr>
 </table>
 </div>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>Obtain the Impala_Kudu parcel either by using the parcel repository or downloading it manually.</p>
 <div class="ulist">
 <ul>
 <li>
 <p>To use the parcel repository:</p>
 <div class="ulist">
 <ul>
 <li>
 <p>Go to <strong>Hosts / Parcels</strong>.</p>
 </li>
 <li>
 <p>Click <strong>Edit Settings</strong>. Add <a href="http://archive.cloudera.com/beta/impala-kudu/parcels/latest/" class="bare">http://archive.cloudera.com/beta/impala-kudu/parcels/latest/</a>
 as a <strong>Remote Parcel Repository URL</strong>. Click <strong>Save Changes</strong>.</p>
 </li>
 </ul>
 </div>
 </li>
 <li>
 <p>To download the parcel manually:</p>
 <div class="ulist">
 <ul>
 <li>
 <p>Download the parcel for your operating system from
 <a href="http://archive.cloudera.com/beta/impala-kudu/parcels/latest/" class="bare">http://archive.cloudera.com/beta/impala-kudu/parcels/latest/</a> and upload
 it to <code>/opt/cloudera/parcel-repo/</code> on the Cloudera Manager server.</p>
 </li>
 <li>
 <p>Create a SHA1 file for the parcel. Cloudera Manager expects the SHA1 to be named
 with the exact same name as the parcel, with a <code>.sha</code> ending added, and to only
 contain the SHA1 itself, not the name of the parcel.</p>
 <div class="listingblock">
 <div class="content">
 <pre>sha1sum &lt;name_of_parcel_file&gt; | awk {'print $1'} &gt; &lt;name_of_parcel_file&gt;.sha</pre>
 </div>
 </div>
 </li>
 </ul>
 </div>
 </li>
 </ul>
 </div>
 </li>
 <li>
 <p>Go to <strong>Hosts / Parcels</strong>. Click <strong>Check for New Parcels.</strong> Verify that <strong>Impala_Kudu</strong>
 is in the list.</p>
 </li>
 <li>
 <p>Download (if necessary), distribute, and activate the <strong>Impala_Kudu</strong> parcel.</p>
 </li>
 <li>
 <p>Add a new Impala service. This service will use the Impala_Kudu parcel.</p>
 <div class="ulist">
 <ul>
 <li>
 <p>Go to the cluster and click <strong>Actions / Add a Service</strong>.</p>
 </li>
 <li>
 <p>Choose one host to run the Catalog Server, one to run the StateServer, and one
 or more to run Impala Daemon instances. Click <strong>Continue</strong>.</p>
 </li>
 <li>
 <p>Choose one or more Impala scratch directories. Click <strong>Continue</strong>. The Impala service
 starts. <strong>However, the features that Impala needs in order to work with Kudu are not
 enabled yet.</strong></p>
 </li>
 </ul>
 </div>
 </li>
 <li>
 <p>Enable the features that allow Impala to work with Kudu.</p>
 <div class="ulist">
 <ul>
 <li>
 <p>Go to the new Impala service. Click <strong>Configuration</strong>.</p>
 </li>
 <li>
 <p>Search for the <strong>Impala Service Environment Advanced Configuration Snippet (Safety
 Valve)</strong> configuration item. Add the following to the text field and save your changes:
 <code>IMPALA_KUDU=1</code></p>
 </li>
 <li>
 <p>Restart the Impala service.</p>
 </li>
 <li>
 <p>You can verify that the Kudu features are available to Impala by running the following
 query in Impala Shell:</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">select if(version() like '%KUDU%', "all set to go!", "check your configs") as s;

 Query: select if(version() like '%KUDU%', "all set to go!", "check your configs") as s
 +----------------+
 | s              |
 +----------------+
 | all set to go! |
 +----------------+
 Fetched 1 row(s) in 0.02s</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>If you do not 'all set to go!', carefully review the previous instructions to be sure
 that you have not missed a step.</p>
 </div>
 </li>
 </ul>
 </div>
 </li>
 </ol>
 </div>
 </div>
 <div class="sect3">
 <h4 id="_installation_using_the_code_deploy_py_code_script"><a class="link" href="#_installation_using_the_code_deploy_py_code_script">Installation using the <code>deploy.py</code> Script</a></h4>
 <div class="paragraph">
 <p>If you use parcels, Cloudera recommends using the included <code>deploy.py</code> script to
 install and deploy the Impala_Kudu service into your cluster. If your cluster does
 not have an existing Impala instance, the script is optional. However, if you do
 have an existing Impala instance and want to install Impala_Kudu side-by-side,
 you must use the script.</p>
 </div>
 <div class="ulist">
 <div class="title">Prerequisites</div>
 <ul>
 <li>
 <p>The script depends upon the Cloudera Manager API Python bindings. Install the bindings
 using <code>sudo pip install cm-api</code> (or as an unprivileged user, with the <code>--user</code>
 option to <code>pip</code>), or see <a href="http://cloudera.github.io/cm_api/docs/python-client/" class="bare">http://cloudera.github.io/cm_api/docs/python-client/</a>
 for more details.</p>
 </li>
 <li>
 <p>You need the following information to run the script:</p>
 <div class="ulist">
 <ul>
 <li>
 <p>The IP address or fully-qualified domain name of the Cloudera Manager server.</p>
 </li>
 <li>
 <p>The IP address or fully-qualified domain name of the host that should run the Kudu
 master process, if different from the Cloudera Manager server.</p>
 </li>
 <li>
 <p>The cluster name, if Cloudera Manager manages multiple clusters.</p>
 </li>
 <li>
 <p>If you have an existing Impala service and want to clone its configuration, you
 need to know the name of the existing service.</p>
 </li>
 <li>
 <p>If your cluster has more than one instance of a HDFS, Hive, HBase, or other CDH
 service that this Impala_Kudu service depends upon, the name of the service this new
 Impala_Kudu service should use.</p>
 </li>
 <li>
 <p>A name for the new Impala service.</p>
 </li>
 <li>
 <p>A user name and password with <strong>Full Administrator</strong> privileges in Cloudera Manager.</p>
 </li>
 <li>
 <p>The IP address or host name of the host where the new Impala_Kudu service&#8217;s master role
 should be deployed, if not the Cloudera Manager server.</p>
 </li>
 <li>
 <p>A comma-separated list of local (not HDFS) scratch directories which the new
 Impala_Kudu service should use, if you are not cloning an existing Impala service.</p>
 </li>
 </ul>
 </div>
 </li>
 <li>
 <p>Your Cloudera Manager server needs network access to reach the parcel repository
 hosted on <code>cloudera.com</code>.</p>
 </li>
 </ul>
 </div>
 <div class="ulist">
 <div class="title">Procedure</div>
 <ul>
 <li>
 <p>Download the <code>deploy.py</code> from <a href="https://github.com/apache/incubator-impala/blob/master/infra/deploy/deploy.py" class="bare">https://github.com/apache/incubator-impala/blob/master/infra/deploy/deploy.py</a>
 using <code>curl</code> or another utility of your choice.</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-bash" data-lang="bash">$ curl -O https://raw.githubusercontent.com/apache/incubator-impala/master/infra/deploy/deploy.py</code></pre>
 </div>
 </div>
 </li>
 <li>
 <p>Run the <code>deploy.py</code> script. The syntax below creates a standalone IMPALA_KUDU
 service called <code>IMPALA_KUDU-1</code> on a cluster called <code>Cluster 1</code>. Exactly one HDFS, Hive,
 and HBase service exist in Cluster 1, so service dependencies are not required.
 The cluster should not already have an Impala instance.</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-bash" data-lang="bash">$ python deploy.py create IMPALA_KUDU-1 --cluster 'Cluster 1' \
   --master_host &lt;FQDN_of_Kudu_master_server&gt; \
   --host &lt;FQDN_of_cloudera_manager_server&gt;</code></pre>
 </div>
 </div>
 </li>
 </ul>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 If you do not specify <code>--master_host</code>, the Kudu master is configured to run
 on the Cloudera Manager server (the value specified by the <code>--host</code> parameter).
 </td>
 </tr>
 </table>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>If two HDFS services are available, called <code>HDFS-1</code> and <code>HDFS-2</code>, use the following
 syntax to create the same <code>IMPALA_KUDU-1</code> service using <code>HDFS-2</code>. You can specify
 multiple types of dependencies; use the <code>deploy.py create -h</code> command for details.</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-bash" data-lang="bash">$ python deploy.py create IMPALA_KUDU-1 --cluster 'Cluster 1' --hdfs_dependency HDFS-2 \
   --host &lt;FQDN_of_cloudera_manager_server&gt;</code></pre>
 </div>
 </div>
 </li>
 <li>
 <p>Run the <code>deploy.py</code> script with the following syntax to clone an existing IMPALA
 service called <code>IMPALA-1</code> to a new IMPALA_KUDU service called <code>IMPALA_KUDU-1</code>, where
 Cloudera Manager only manages a single cluster.  This new <code>IMPALA_KUDU-1</code> service
 can run side by side with the <code>IMPALA-1</code> service if there is sufficient RAM for both.
 <code>IMPALA_KUDU-1</code> should be given at least 16 GB of RAM and possibly more depending
 on the complexity of the workload and the query concurrency level.</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-bash" data-lang="bash">$ python deploy.py clone IMPALA_KUDU-1 IMPALA-1 --host &lt;FQDN_of_cloudera_manager_server&gt;</code></pre>
 </div>
 </div>
 </li>
 <li>
 <p>Additional parameters are available for <code>deploy.py</code>. To view them, use the <code>-h</code>
 argument.  You can also use commands such as <code>deploy.py create -h</code> or
 <code>deploy.py clone -h</code> to get information about additional arguments for individual operations.</p>
 </li>
 <li>
 <p>The service is created <strong>but not started</strong>. Review the configuration in Cloudera Manager
 and start the service.</p>
 </li>
 </ul>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="install_impala_kudu_packages"><a class="link" href="#install_impala_kudu_packages">Installing Impala_Kudu Using Packages</a></h3>
 <div class="paragraph">
 <p>Before installing Impala_Kudu packages, you need to uninstall any existing Impala
 packages, using operating system utilities. For this reason, you cannot use Impala_Kudu
 alongside another Impala instance if you use packages.</p>
 </div>
 <table id="impala_kudu_package_locations" class="tableblock frame-all grid-all spread">
 <caption class="title">Table 1. Impala_Kudu Package Locations</caption>
 <colgroup>
 <col style="width: 33%;">
 <col style="width: 33%;">
 <col style="width: 33%;">
 </colgroup>
 <thead>
 <tr>
 <th class="tableblock halign-right valign-top">OS</th>
 <th class="tableblock halign-left valign-top">Repository</th>
 <th class="tableblock halign-left valign-top">Individual Packages</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td class="tableblock halign-right valign-top"><p class="tableblock"><strong>RHEL or CentOS</strong></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/redhat/6/x86_64/impala-kudu/cloudera-impala-kudu.repo">RHEL 6 or CentOS 6</a>,
                    <a href="http://archive.cloudera.com/beta/impala-kudu/redhat/7/x86_64/impala-kudu/cloudera-impala-kudu.repo">RHEL 7 or CentOS 7</a></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/redhat/6/x86_64/impala-kudu/0/RPMS/x86_64/">RHEL 6 or CentOS 6</a>,
                    <a href="http://archive.cloudera.com/beta/impala-kudu/redhat/7/x86_64/impala-kudu/0/RPMS/x86_64/">RHEL 7 or CentOS 7</a></p></td>
 </tr>
 <tr>
 <td class="tableblock halign-right valign-top"><p class="tableblock"><strong>Ubuntu</strong></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/ubuntu/trusty/amd64/impala-kudu/cloudera.list">Trusty</a></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/ubuntu/trusty/amd64/impala-kudu/pool/contrib/i/impala-kudu/">Trusty</a></p></td>
 </tr>
 </tbody>
 </table>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>Download and configure the Impala_Kudu repositories for your operating system, or manually
 download individual RPMs, the appropriate link from <a href="#impala_kudu_package_locations">Impala_Kudu Package Locations</a>.</p>
 </li>
 <li>
 <p>An Impala cluster has at least one <code>impala-kudu-server</code> and at most one <code>impala-kudu-catalog</code>
 and <code>impala-kudu-state-store</code>.  To connect to Impala from the command line, install
 the <code>impala-kudu-shell</code> package.</p>
 </li>
 </ol>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_adding_impala_service_in_cloudera_manager"><a class="link" href="#_adding_impala_service_in_cloudera_manager">Adding Impala service in Cloudera Manager</a></h3>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>Add a new Impala service in Cloudera Manager.</p>
 <div class="ulist">
 <ul>
 <li>
 <p>Go to the cluster and click <strong>Actions / Add a Service</strong>.</p>
 </li>
 <li>
 <p>Choose one host to run the Catalog Server, one to run the Statestore, and at
 least three to run Impala Daemon instances. Click <strong>Continue</strong>.</p>
 </li>
 <li>
 <p>Choose one or more Impala scratch directories. Click <strong>Continue</strong>.</p>
 </li>
 </ul>
 </div>
 </li>
 <li>
 <p>The Impala service starts.</p>
 </li>
 </ol>
 </div>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="_installing_impala_kudu_without_cloudera_manager"><a class="link" href="#_installing_impala_kudu_without_cloudera_manager">Installing Impala_Kudu Without Cloudera Manager</a></h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>Before installing Impala_Kudu packages, you need to uninstall any existing Impala
 packages, using operating system utilities. For this reason, you cannot use Impala_Kudu
 alongside another Impala instance if you use packages.</p>
 </div>
 <div class="admonitionblock important">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-important" title="Important"></i>
 </td>
 <td class="content">
 Do not use these command-line instructions if you use Cloudera Manager.
 Instead, follow <a href="#install_impala_kudu_packages">Installing Impala_Kudu Using Packages</a>.
 </td>
 </tr>
 </table>
 </div>
 <table id="impala_kudu_non-cm_locations" class="tableblock frame-all grid-all spread">
 <caption class="title">Table 2. Impala_Kudu Package Locations</caption>
 <colgroup>
 <col style="width: 33%;">
 <col style="width: 33%;">
 <col style="width: 33%;">
 </colgroup>
 <thead>
 <tr>
 <th class="tableblock halign-right valign-top">OS</th>
 <th class="tableblock halign-left valign-top">Repository</th>
 <th class="tableblock halign-left valign-top">Individual Packages</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td class="tableblock halign-right valign-top"><p class="tableblock"><strong>RHEL or CentOS</strong></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/redhat/6/x86_64/impala-kudu/cloudera-impala-kudu.repo">RHEL 6 or CentOS 6</a>,
                    <a href="http://archive.cloudera.com/beta/impala-kudu/redhat/7/x86_64/impala-kudu/cloudera-impala-kudu.repo">RHEL 7 or CentOS 7</a></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/redhat/6/x86_64/impala-kudu/0/RPMS/x86_64/">RHEL 6 or CentOS 6</a>,
                    <a href="http://archive.cloudera.com/beta/impala-kudu/redhat/7/x86_64/impala-kudu/0/RPMS/x86_64/">RHEL 7 or CentOS 7</a></p></td>
 </tr>
 <tr>
 <td class="tableblock halign-right valign-top"><p class="tableblock"><strong>Ubuntu</strong></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/ubuntu/trusty/amd64/impala-kudu/cloudera.list">Trusty</a></p></td>
 <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="http://archive.cloudera.com/beta/impala-kudu/ubuntu/trusty/amd64/impala-kudu/pool/contrib/i/impala-kudu/">Trusty</a></p></td>
 </tr>
 </tbody>
 </table>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>Download and configure the Impala_Kudu repositories for your operating system, or manually
 download individual RPMs, the appropriate link from <a href="#impala_kudu_non-cm_locations">Impala_Kudu Package Locations</a>.</p>
 </li>
 <li>
 <p>An Impala cluster has at least one <code>impala-kudu-server</code> and at most one <code>impala-kudu-catalog</code>
 and <code>impala-kudu-state-store</code>.  To connect to Impala from the command line, install
 the <code>impala-kudu-shell</code> package.</p>
 </li>
 </ol>
 </div>
 <div class="sect2">
 <h3 id="_starting_impala_kudu_services"><a class="link" href="#_starting_impala_kudu_services">Starting Impala_Kudu Services</a></h3>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>Use the Impala start-up scripts to start each service on the relevant hosts:</p>
 <div class="listingblock">
 <div class="content">
 <pre>$ sudo service impala-state-store start

 $ sudo service impala-catalog start

 $ sudo service impala-server start</pre>
 </div>
 </div>
 </li>
 </ol>
 </div>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="_using_the_impala_shell"><a class="link" href="#_using_the_impala_shell">Using the Impala Shell</a></h2>
 <div class="sectionbody">
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 This is only a small sub-set of Impala Shell functionality. For more details, see the
 <a href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/impala_impala_shell.html">Impala Shell</a> documentation.
 </td>
 </tr>
 </table>
 </div>
 <div class="paragraph">
 <p>Neither Kudu nor Impala need special configuration in order for you to use the Impala
 Shell or the Impala API to insert, update, delete, or query Kudu data using Impala.
 However, you do need to create a mapping between the Impala and Kudu tables. Kudu
 provides the Impala query to map to an existing Kudu table in the web UI.</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Be sure you are using the <code>impala-shell</code> binary provided by the Impala_Kudu package,
 rather than the default CDH Impala binary. The following shows how to verify this
 using the <code>alternatives</code> command on a RHEL or CentOS host.</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-bash" data-lang="bash">$ sudo alternatives --display impala-shell

 impala-shell - status is auto.
  link currently points to /opt/cloudera/parcels/CDH-5.5.0-1.cdh5.5.0.p0.1007/bin/impala-shell
 /opt/cloudera/parcels/CDH-5.5.0-1.cdh5.5.0.p0.1007/bin/impala-shell - priority 10
 /opt/cloudera/parcels/IMPALA_KUDU-2.3.0-1.cdh5.5.0.p0.119/bin/impala-shell - priority 5
 Current `best' version is /opt/cloudera/parcels/CDH-5.5.0-1.cdh5.5.0.p0.1007/bin/impala-shell.

 $ sudo alternatives --set impala-shell /opt/cloudera/parcels/IMPALA_KUDU-2.3.0-1.cdh5.5.0.p0.119/bin/impala-shell</code></pre>
 </div>
 </div>
 </li>
 <li>
 <p>Start Impala Shell using the <code>impala-shell</code> command. By default, <code>impala-shell</code>
 attempts to connect to the Impala daemon on <code>localhost</code> on port 21000. To connect
 to a different host,, use the <code>-i &lt;host:port&gt;</code> option. To automatically connect to
 a specific Impala database, use the <code>-d &lt;database&gt;</code> option. For instance, if all your
 Kudu tables are in Impala in the database <code>impala_kudu</code>, use <code>-d impala_kudu</code> to use
 this database.</p>
 </li>
 <li>
 <p>To quit the Impala Shell, use the following command: <code>quit;</code></p>
 </li>
 </ul>
 </div>
 <div class="sect2">
 <h3 id="_internal_and_external_impala_tables"><a class="link" href="#_internal_and_external_impala_tables">Internal and External Impala Tables</a></h3>
 <div class="paragraph">
 <p>When creating a new Kudu table using Impala, you can create the table as an internal
 table or an external table.</p>
 </div>
 <div class="dlist">
 <dl>
 <dt class="hdlist1">Internal</dt>
 <dd>
 <p>An internal table is managed by Impala, and when you drop it from Impala,
 the data and the table truly are dropped. When you create a new table using Impala,
 it is generally a internal table.</p>
 </dd>
 <dt class="hdlist1">External</dt>
 <dd>
 <p>An external table (created by <code>CREATE EXTERNAL TABLE</code>) is not managed by
 Impala, and dropping such a table does not drop the table from its source location
 (here, Kudu). Instead, it only removes the mapping between Impala and Kudu. This is
 the mode used in the syntax provided by Kudu for mapping an existing table to Impala.</p>
 </dd>
 </dl>
 </div>
 <div class="paragraph">
 <p>See the
 <a href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/impala_tables.html">Impala documentation</a>
 for more information about internal and external tables.</p>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_querying_an_existing_kudu_table_in_impala"><a class="link" href="#_querying_an_existing_kudu_table_in_impala">Querying an Existing Kudu Table In Impala</a></h3>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>Go to <a href="http://kudu-master.example.com:8051/tables/" class="bare">http://kudu-master.example.com:8051/tables/</a>, where <em>kudu-master.example.com</em>
 is the address of your Kudu master.</p>
 </li>
 <li>
 <p>Click the table ID for the relevant table.</p>
 </li>
 <li>
 <p>Scroll to the bottom of the page, or search for <code>Impala CREATE TABLE statement</code>.
 Copy the entire statement.</p>
 </li>
 <li>
 <p>Paste the statement into Impala. Impala now has a mapping to your Kudu table.</p>
 </li>
 </ol>
 </div>
 </div>
 <div class="sect2">
 <h3 id="kudu_impala_create_table"><a class="link" href="#kudu_impala_create_table">Creating a New Kudu Table From Impala</a></h3>
 <div class="paragraph">
 <p>Creating a new table in Kudu from Impala is similar to mapping an existing Kudu table
 to an Impala table, except that you need to write the <code>CREATE</code> statement yourself.
 Use the following example as a guideline. Impala first creates the table, then creates
 the mapping.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE my_first_table
 (
   id BIGINT,
   name STRING
 )
 DISTRIBUTE BY HASH INTO 16 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'my_first_table',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'id'
 );</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>In the <code>CREATE TABLE</code> statement, the columns that comprise the primary key must
 be listed first. Additionally, primary key columns are implicitly marked <code>NOT NULL</code>.</p>
 </div>
 <div class="paragraph">
 <p>The following table properties are required, and the <code>kudu.key_columns</code> property must
 contain at least one column.</p>
 </div>
 <div class="dlist">
 <dl>
 <dt class="hdlist1"><code>storage_handler</code></dt>
 <dd>
 <p>the mechanism used by Impala to determine the type of data source.
 For Kudu tables, this must be <code>com.cloudera.kudu.hive.KuduStorageHandler</code>.</p>
 </dd>
 <dt class="hdlist1"><code>kudu.table_name</code></dt>
 <dd>
 <p>the name of the table that Impala will create (or map to) in Kudu.</p>
 </dd>
 <dt class="hdlist1"><code>kudu.master_addresses</code></dt>
 <dd>
 <p>the list of Kudu masters Impala should communicate with.</p>
 </dd>
 <dt class="hdlist1"><code>kudu.key_columns</code></dt>
 <dd>
 <p>the comma-separated list of primary key columns, whose contents
 should not be nullable.</p>
 </dd>
 </dl>
 </div>
 <div class="paragraph">
 <p>When creating a new Kudu table, you are required to specify a distribution scheme.
 See <a href="#partitioning_tables">Partitioning Tables</a>. The table creation example above is distributed into
 16 buckets by hashing the <code>id</code> column, for simplicity. See
 <a href="#partitioning_rules_of_thumb">Partitioning Rules of Thumb</a> for guidelines on partitioning.</p>
 </div>
 <div class="sect3">
 <h4 id="__code_create_table_as_select_code"><a class="link" href="#__code_create_table_as_select_code"><code>CREATE TABLE AS SELECT</code></a></h4>
 <div class="paragraph">
 <p>You can create a table by querying any other table or tables in Impala, using a <code>CREATE
 TABLE &#8230;&#8203; AS SELECT</code> statement. The following example imports all rows from an existing table
 <code>old_table</code> into a Kudu table <code>new_table</code>. The columns in <code>new_table</code> will have the
 same names and types as the columns in <code>old_table</code>, but you need to populate the <code>kudu.key_columns</code>
 property. In this example, the primary key columns are <code>ts</code> and <code>name</code>.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE new_table
 DISTRIBUTE BY HASH INTO 16 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'new_table',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'ts, name'
 )
 AS SELECT * FROM old_table;</code></pre>
 </div>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 <div class="paragraph">
 <p>For <code>CREATE TABLE &#8230;&#8203; AS SELECT</code> we currently require that the first columns that are
 projected in the <code>SELECT</code> statement correspond to the Kudu table keys and are in the
 same order  (<code>ts</code> then <code>name</code> in the example above). If the default projection generated by <code><strong></code>
 does not meet this requirement, the user should avoid using <code></strong></code> and explicitly mention
 the columns to project, in the correct order.</p>
 </div>
 </td>
 </tr>
 </table>
 </div>
 <div class="paragraph">
 <p>You can refine the <code>SELECT</code> statement to only match the rows and columns you want
 to be inserted into the new table. You can also rename the columns by using syntax
 like <code>SELECT name as new_name</code>.</p>
 </div>
 </div>
 <div class="sect3">
 <h4 id="_pre_splitting_tables"><a class="link" href="#_pre_splitting_tables">Pre-Splitting Tables</a></h4>
 <div class="paragraph">
 <p>Tables are divided into tablets which are each served by one or more tablet
 servers. Ideally, tablets should split a table&#8217;s data relatively equally. Kudu currently
 has no mechanism for automatically (or manually) splitting a pre-existing tablet.
 Until this feature has been implemented, <strong>you must pre-split your table when you create
 it</strong>. When designing your table schema, consider primary keys that will allow you to
 pre-split your table into tablets which grow at similar rates. You can provide split
 points using a <code>DISTRIBUTE BY</code> clause when creating a table using Impala:</p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 Impala keywords, such as <code>group</code>, are enclosed by back-tick characters when
 they are not used in their keyword sense.
 </td>
 </tr>
 </table>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE cust_behavior (
   _id BIGINT,
   salary STRING,
   edu_level INT,
   usergender STRING,
   `group` STRING,
   city STRING,
   postcode STRING,
   last_purchase_price FLOAT,
   last_purchase_date BIGINT,
   category STRING,
   sku STRING,
   rating INT,
   fulfilled_date BIGINT
 )
 DISTRIBUTE BY RANGE (_id)
   SPLIT ROWS((1439560049342),
              (1439566253755),
              (1439572458168),
              (1439578662581),
              (1439584866994),
              (1439591071407))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',
   'kudu.master_addresses' = 'a1216.halxg.cloudera.com:7051',
   'kudu.key_columns' = '_id',
   'kudu.num_tablet_replicas' = '3'
 );</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>If you have multiple primary key columns, you can specify split points by separating
 them with commas within the inner brackets: <code>(('va',1), ('ab',2))</code>. The expression
 must be valid JSON.</p>
 </div>
 </div>
 <div class="sect3">
 <h4 id="_impala_databases_and_kudu"><a class="link" href="#_impala_databases_and_kudu">Impala Databases and Kudu</a></h4>
 <div class="paragraph">
 <p>Impala uses a database containment model. In Impala, you can create a table within a specific
 scope, referred to as a <em>database</em>. To create the database, use a <code>CREATE DATABASE</code>
 statement. To use the database for further Impala operations such as <code>CREATE TABLE</code>,
 use the <code>USE</code> statement. For example, to create a table in a database called <code>impala_kudu</code>,
 use the following statements:</p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 Impala uses a namespace mechanism to allow for tables to be created within different
 scopes, called <code>databases</code>. To create a database, use a <code>CREATE DATABASE</code>
 statement. To use the database for further Impala operations such as <code>CREATE TABLE</code>,
 use the <code>USE</code> statement. For example, to create a table in a database called <code>impala_kudu</code>,
 use the following SQL:
 </td>
 </tr>
 </table>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE DATABASE impala_kudu;
 USE impala_kudu;
 CREATE TABLE my_first_table (
   id BIGINT,
   name STRING
 )
 DISTRIBUTE BY HASH INTO 4 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'my_first_table',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'id'
 );</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>The <code>my_first_table</code> table is created within the <code>impala_kudu</code> database. To refer
 to this database in the future, without using a specific <code>USE</code> statement, you can
 refer to the table using <code>&lt;database&gt;.&lt;table&gt;</code> syntax. For example, to specify the
 <code>my_first_table</code> table in database <code>impala_kudu</code>, as opposed to any other table with
 the same name in another database, use <code>impala_kudu.my_first_table</code>. This also applies
 to <code>INSERT</code>, <code>UPDATE</code>, <code>DELETE</code>, and <code>DROP</code> statements.</p>
 </div>
 <div class="admonitionblock warning">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-warning" title="Warning"></i>
 </td>
 <td class="content">
 Currently, Kudu does not encode the Impala database into the table name
 in any way. This means that even though you can create Kudu tables within Impala databases,
 the actual Kudu tables need to be unique within Kudu. For example, if you create <code>database_1.my_kudu_table</code>
 and <code>database_2.my_kudu_table</code>, you will have a naming collision within Kudu, even
 though this would not cause a problem in Impala. This can be resolved by specifying
 a unique Kudu table name in the <code>kudu.table_name</code> property.
 </td>
 </tr>
 </table>
 </div>
 </div>
 <div class="sect3">
 <h4 id="_impala_keywords_not_supported_for_kudu_tables"><a class="link" href="#_impala_keywords_not_supported_for_kudu_tables">Impala Keywords Not Supported for Kudu Tables</a></h4>
 <div class="paragraph">
 <p>The following Impala keywords are not supported when creating Kudu tables:
 - <code>PARTITIONED</code>
 - <code>STORED AS</code>
 - <code>LOCATION</code>
 - <code>ROWFORMAT</code></p>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_optimizing_performance_for_evaluating_sql_predicates"><a class="link" href="#_optimizing_performance_for_evaluating_sql_predicates">Optimizing Performance for Evaluating SQL Predicates</a></h3>
 <div class="paragraph">
 <p>If the <code>WHERE</code> clause of your query includes comparisons with the operators
 <code>=</code>, <code>&lt;=</code>, or <code>&gt;=</code>, Kudu evaluates the condition directly and only returns the
 relevant results. This provides optimum performance, because Kudu only returns the
 relevant results to Impala. For predicates <code>&lt;</code>, <code>&gt;</code>, <code>!=</code>, or any other predicate
 type supported by Impala, Kudu does not evaluate the predicates directly, but returns
 all results to Impala and relies on Impala to evaluate the remaining predicates and
 filter the results accordingly. This may cause differences in performance, depending
 on the delta of the result set before and after evaluating the <code>WHERE</code> clause.</p>
 </div>
 <div class="paragraph">
 <p>In the <code>CREATE TABLE</code> statement, the first column must be the primary key. Additionally,
 the primary key can never be NULL when inserting or updating a row.</p>
 </div>
 <div class="paragraph">
 <p>All properties in the <code>TBLPROPERTIES</code> statement are required, and the <code>kudu.key_columns</code>
 must contain at least one column.</p>
 </div>
 </div>
 <div class="sect2">
 <h3 id="partitioning_tables"><a class="link" href="#partitioning_tables">Partitioning Tables</a></h3>
 <div class="paragraph">
 <p>Tables are partitioned into tablets according to a partition schema on the primary
 key columns. Each tablet is served by at least one tablet server. Ideally, a table
 should be split into tablets that are distributed across a number of tablet servers
 to maximize parallel operations. The details of the partitioning schema you use
 will depend entirely on the type of data you store and how you access it. For a full
 discussion of schema design in Kudu, see <a href="schema_design.html">Schema Design</a>.</p>
 </div>
 <div class="paragraph">
 <p>Kudu currently has no mechanism for splitting or merging tablets after the table has
 been created. Until this feature has been implemented, you must provide a partition
 schema for your table when you create it. When designing your tables, consider using
 primary keys that will allow you to partition your table into tablets which grow
 at similar rates.</p>
 </div>
 <div class="paragraph">
 <p>You can partition your table using Impala&#8217;s <code>DISTRIBUTE BY</code> keyword, which
 supports distribution by <code>RANGE</code> or <code>HASH</code>. The partition scheme can contain zero
 or more <code>HASH</code> definitions, followed by an optional <code>RANGE</code> definition. The <code>RANGE</code>
 definition can refer to one or more primary key columns.
 Examples of <a href="#basic_partitioning">basic</a> and <a href="#advanced_partitioning">advanced</a>
 partitioning are shown below.</p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 Impala keywords, such as <code>group</code>, are enclosed by back-tick characters when
 they are used as identifiers, rather than as keywords.
 </td>
 </tr>
 </table>
 </div>
 <div class="sect3">
 <h4 id="basic_partitioning"><a class="link" href="#basic_partitioning">Basic Partitioning</a></h4>
 <div class="paragraph">
 <div class="title"><code>DISTRIBUTE BY RANGE</code></div>
 <p>You can specify split rows for one or more primary key columns that contain integer
 or string values. Range partitioning in Kudu allows splitting a table based based
 on the lexicographic order of its primary keys. This allows you to balance parallelism
 in writes with scan efficiency.</p>
 </div>
 <div class="paragraph">
 <p>The split row does not need to exist. It defines an exclusive bound in the form of:
 <code>(START_KEY, SplitRow), [SplitRow, STOP_KEY)</code> In other words, the split row, if
 it exists, is included in the tablet after the split point. For instance, if you
 specify a split row <code>abc</code>, a row <code>abca</code> would be in the second tablet, while a row
 <code>abb</code> would be in the first.</p>
 </div>
 <div class="paragraph">
 <p>Suppose you have a table that has columns <code>state</code>, <code>name</code>, and <code>purchase_count</code>. The
 following example creates 50 tablets, one per US state.</p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 <div class="title">Monotonically Increasing Values</div>
 <div class="paragraph">
 <p>If you partition by range on a column whose values are monotonically increasing,
 the last tablet will grow much larger than the others. Additionally, all data
 being inserted will be written to a single tablet at a time, limiting the scalability
 of data ingest. In that case, consider distributing by <code>HASH</code> instead of, or in
 addition to, <code>RANGE</code>.</p>
 </div>
 </td>
 </tr>
 </table>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE customers (
   state STRING,
   name STRING,
   purchase_count int32,
 )
 DISTRIBUTE BY RANGE (state)
   SPLIT ROWS (('al'),
               ('ak'),
               ('ar'),
               ...
               ('wv'),
               ('wy'))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'customers',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'state, name'
 );</code></pre>
 </div>
 </div>
 <div id="distribute_by_hash" class="paragraph">
 <div class="title"><code>DISTRIBUTE BY HASH</code></div>
 <p>Instead of distributing by an explicit range, or in combination with range distribution,
 you can distribute into a specific number of 'buckets' by hash. You specify the primary
 key columns you want to partition by, and the number of buckets you want to use. Rows are
 distributed by hashing the specified key columns. Assuming that the values being
 hashed do not themselves exhibit significant skew, this will serve to distribute
 the data evenly across buckets.</p>
 </div>
 <div class="paragraph">
 <p>You can specify multiple definitions, and you can specify definitions which
 use compound primary keys. However, one column cannot be mentioned in multiple hash
 definitions. Consider two columns, <code>a</code> and <code>b</code>:
 * <span class="icon green"><i class="fa fa-check fa-pro"></i></span> <code>HASH(a)</code>, <code>HASH(b)</code>
 * <span class="icon green"><i class="fa fa-check fa-pro"></i></span> <code>HASH(a,b)</code>
 * <span class="icon red"><i class="fa fa-times fa-pro"></i></span> <code>HASH(a), HASH(a,b)</code></p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 <code>DISTRIBUTE BY HASH</code> with no column specified is a shortcut to create the desired
 number of buckets by hashing all primary key columns.
 </td>
 </tr>
 </table>
 </div>
 <div class="paragraph">
 <p>Hash partitioning is a reasonable approach if primary key values are evenly
 distributed in their domain and no data skew is apparent, such as timestamps or
 serial IDs.</p>
 </div>
 <div class="paragraph">
 <p>The following example creates 16 tablets by hashing the <code>id</code> and <code>sku</code> columns. This spreads
 writes across all 16 tablets. In this example, a query for a range of <code>sku</code> values
 is likely to need to read all 16 tablets, so this may not be the optimum schema for
 this table. See <a href="#advanced_partitioning">Advanced Partitioning</a> for an extended example.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE cust_behavior (
   id BIGINT,
   sku STRING,
   salary STRING,
   edu_level INT,
   usergender STRING,
   `group` STRING,
   city STRING,
   postcode STRING,
   last_purchase_price FLOAT,
   last_purchase_date BIGINT,
   category STRING,
   rating INT,
   fulfilled_date BIGINT
 )
 DISTRIBUTE BY HASH INTO 16 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'id, sku'
 );</code></pre>
 </div>
 </div>
 </div>
 <div class="sect3">
 <h4 id="advanced_partitioning"><a class="link" href="#advanced_partitioning">Advanced Partitioning</a></h4>
 <div class="paragraph">
 <p>You can combine <code>HASH</code> and <code>RANGE</code> partitioning to create more complex partition schemas.
 You can specify zero or more <code>HASH</code> definitions, followed by zero or one <code>RANGE</code> definitions.
 Each definition can encompass one or more columns. While enumerating every possible distribution
 schema is out of the scope of this document, a few examples illustrate some of the
 possibilities.</p>
 </div>
 <div class="paragraph">
 <div class="title"><code>DISTRIBUTE BY RANGE</code> Using Compound Split Rows</div>
 <p>This example creates 100 tablets, two for each US state. Per state, the first tablet
 holds names starting with characters before 'm', and the second tablet holds names
 starting with 'm'-'z'. Writes are spread across at least 50 tablets, and possibly
 up to 100. A query for a range of names in a given state is likely to only need to read from
 one tablet, while a query for a range of names across every state will likely
 read from at most 50 tablets.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE customers (
   state STRING,
   name STRING,
   purchase_count int32,
 )
 DISTRIBUTE BY RANGE (state, name)
   SPLIT ROWS (('al', ''),
               ('al', 'm'),
               ('ak', ''),
               ('ak', 'm'),
               ...
               ('wy', ''),
               ('wy', 'm'))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'customers',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'state, name'
 );</code></pre>
 </div>
 </div>
 </div>
 <div class="sect3">
 <h4 id="__code_distribute_by_hash_code_and_code_range_code"><a class="link" href="#__code_distribute_by_hash_code_and_code_range_code"><code>DISTRIBUTE BY HASH</code> and <code>RANGE</code></a></h4>
 <div class="paragraph">
 <p>Consider the <a href="#distribute_by_hash">simple hashing</a> example above, If you often query for a range of <code>sku</code>
 values, you can optimize the example by combining hash partitioning with range partitioning.</p>
 </div>
 <div class="paragraph">
 <p>The following example still creates 16 tablets, by first hashing the <code>id</code> column into 4
 buckets, and then applying range partitioning to split each bucket into four tablets,
 based upon the value of the <code>sku</code> string. Writes are spread across at least four tablets
 (and possibly up to 16). When you query for a contiguous range of <code>sku</code> values, you have a
 good chance of only needing to read from a quarter of the tablets to fulfill the query.</p>
 </div>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 By default, the entire primary key is hashed when you use <code>DISTRIBUTE BY HASH</code>.
 To hash on only part of the primary key, specify it by using syntax like <code>DISTRIBUTE
 BY HASH (id, sku)</code>.
 </td>
 </tr>
 </table>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE cust_behavior (
   id BIGINT,
   sku STRING,
   salary STRING,
   edu_level INT,
   usergender STRING,
   `group` STRING,
   city STRING,
   postcode STRING,
   last_purchase_price FLOAT,
   last_purchase_date BIGINT,
   category STRING,
   rating INT,
   fulfilled_date BIGINT
 )
 DISTRIBUTE BY HASH (id) INTO 4 BUCKETS,
 RANGE (sku)
   SPLIT ROWS (('g'),
               ('o'),
               ('u'))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'id, sku'
 );</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <div class="title">Multiple <code>DISTRIBUTE BY HASH</code> Definitions</div>
 <p>Again expanding the example above, suppose that the query pattern will be unpredictable,
 but you want to ensure that writes are spread across a large number of tablets
 You can achieve maximum distribution across the entire primary key by hashing on
 both primary key columns.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">CREATE TABLE cust_behavior (
   id BIGINT,
   sku STRING,
   salary STRING,
   edu_level INT,
   usergender STRING,
   `group` STRING,
   city STRING,
   postcode STRING,
   last_purchase_price FLOAT,
   last_purchase_date BIGINT,
   category STRING,
   rating INT,
   fulfilled_date BIGINT
 )
 DISTRIBUTE BY HASH (id) INTO 4 BUCKETS,
               HASH (sku) INTO 4 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'id, sku'
 );</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>The example creates 16 buckets. You could also use <code>HASH (id, sku) INTO 16 BUCKETS</code>.
 However, a scan for <code>sku</code> values would almost always impact all 16 buckets, rather
 than possibly being limited to 4.</p>
 </div>
 </div>
 <div class="sect3">
 <h4 id="partitioning_rules_of_thumb"><a class="link" href="#partitioning_rules_of_thumb">Partitioning Rules of Thumb</a></h4>
 <div class="ulist">
 <ul>
 <li>
 <p>For large tables, such as fact tables, aim for as many tablets as you have
 cores in the cluster.</p>
 </li>
 <li>
 <p>For small tables, such as dimension tables, aim for a large enough number of tablets
 that each tablet is at least 1 GB in size.</p>
 </li>
 </ul>
 </div>
 <div class="paragraph">
 <p>In general, be mindful the number of tablets limits the parallelism of reads,
 in the current implementation. Increasing the number of tablets significantly
 beyond the number of cores is likely to have diminishing returns.</p>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_inserting_data_into_kudu_tables"><a class="link" href="#_inserting_data_into_kudu_tables">Inserting Data Into Kudu Tables</a></h3>
 <div class="paragraph">
 <p>Impala allows you to use standard SQL syntax to insert data into Kudu.</p>
 </div>
 <div class="sect3">
 <h4 id="_inserting_single_values"><a class="link" href="#_inserting_single_values">Inserting Single Values</a></h4>
 <div class="paragraph">
 <p>This example inserts a single row.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">INSERT INTO my_first_table VALUES (99, "sarah");</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>This example inserts three rows using a single statement.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">INSERT INTO my_first_table VALUES (1, "john"), (2, "jane"), (3, "jim");</code></pre>
 </div>
 </div>
 </div>
 <div class="sect3">
 <h4 id="kudu_impala_insert_bulk"><a class="link" href="#kudu_impala_insert_bulk">Inserting In Bulk</a></h4>
 <div class="paragraph">
 <p>When inserting in bulk, there are at least three common choices. Each may have advantages
 and disadvantages, depending on your data and circumstances.</p>
 </div>
 <div class="dlist">
 <dl>
 <dt class="hdlist1">Multiple single <code>INSERT</code> statements</dt>
 <dd>
 <p>This approach has the advantage of being easy to
 understand and implement. This approach is likely to be inefficient because Impala
 has a high query start-up cost compared to Kudu&#8217;s insertion performance. This will
 lead to relatively high latency and poor throughput.</p>
 </dd>
 <dt class="hdlist1">Single <code>INSERT</code> statement with multiple <code>VALUES</code></dt>
 <dd>
 <p>If you include more
 than 1024 <code>VALUES</code> statements, Impala batches them into groups of 1024 (or the value
 of <code>batch_size</code>) before sending the requests to Kudu. This approach may perform
 slightly better than multiple sequential <code>INSERT</code> statements by amortizing the query start-up
 penalties on the Impala side. To set the batch size for the current Impala
 Shell session, use the following syntax: <code>set batch_size=10000;</code></p>
 <div class="admonitionblock note">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-note" title="Note"></i>
 </td>
 <td class="content">
 Increasing the Impala batch size causes Impala to use more memory. You should
 verify the impact on your cluster and tune accordingly.
 </td>
 </tr>
 </table>
 </div>
 </dd>
 <dt class="hdlist1">Batch Insert</dt>
 <dd>
 <p>The approach that usually performs best, from the standpoint of
 both Impala and Kudu, is usually to import the data using a <code>SELECT FROM</code> statement
 in Impala.</p>
 <div class="olist arabic">
 <ol class="arabic">
 <li>
 <p>If your data is not already in Impala, one strategy is to
 <a href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/impala_txtfile.html">import it from a text file</a>,
 such as a TSV or CSV file.</p>
 </li>
 <li>
 <p><a href="#kudu_impala_create_table">Create the Kudu table</a>, being mindful that the columns
 designated as primary keys cannot have null values.</p>
 </li>
 <li>
 <p>Insert values into the Kudu table by querying the table containing the original
 data, as in the following example:</p>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">INSERT INTO my_kudu_table
   SELECT * FROM legacy_data_import_table;</code></pre>
 </div>
 </div>
 </li>
 </ol>
 </div>
 </dd>
 <dt class="hdlist1">Ingest using the C++ or Java API</dt>
 <dd>
 <p>In many cases, the appropriate ingest path is to
 use the C++ or Java API to insert directly into Kudu tables. Unlike other Impala tables,
 data inserted into Kudu tables via the API becomes available for query in Impala without
 the need for any <code>INVALIDATE METADATA</code> statements or other statements needed for other
 Impala storage types.</p>
 </dd>
 </dl>
 </div>
 </div>
 <div class="sect3">
 <h4 id="insert_ignore"><a class="link" href="#insert_ignore"><code>INSERT</code> and the <code>IGNORE</code> Keyword</a></h4>
 <div class="paragraph">
 <p>Normally, if you try to insert a row that has already been inserted, the insertion
 will fail because the primary key would be duplicated. See <a href="#impala_insertion_caveat">Failures During <code>INSERT</code>, <code>UPDATE</code>, and <code>DELETE</code> Operations</a>.
 If an insert fails part of the way through, you can re-run the insert, using the
 <code>IGNORE</code> keyword, which will ignore only those errors returned from Kudu indicating
 a duplicate key..</p>
 </div>
 <div class="paragraph">
 <p>The first example will cause an error if a row with the primary key <code>99</code> already exists.
 The second example will still not insert the row, but will ignore any error and continue
 on to the next SQL statement.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">INSERT INTO my_first_table VALUES (99, "sarah");
 INSERT IGNORE INTO my_first_table VALUES (99, "sarah");</code></pre>
 </div>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_updating_a_row"><a class="link" href="#_updating_a_row">Updating a Row</a></h3>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">UPDATE my_first_table SET name="bob" where id = 3;</code></pre>
 </div>
 </div>
 <div class="admonitionblock important">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-important" title="Important"></i>
 </td>
 <td class="content">
 The <code>UPDATE</code> statement only works in Impala when the target table is in
 Kudu.
 </td>
 </tr>
 </table>
 </div>
 <div class="sect3">
 <h4 id="_updating_in_bulk"><a class="link" href="#_updating_in_bulk">Updating In Bulk</a></h4>
 <div class="paragraph">
 <p>You can update in bulk using the same approaches outlined in
 <a href="#kudu_impala_insert_bulk">Inserting In Bulk</a>.</p>
 </div>
 </div>
 <div class="sect3">
 <h4 id="__code_update_code_and_the_code_ignore_code_keyword"><a class="link" href="#__code_update_code_and_the_code_ignore_code_keyword"><code>UPDATE</code> and the <code>IGNORE</code> Keyword</a></h4>
 <div class="paragraph">
 <p>Similarly to <a href="#insert_ignore"><code>INSERT</code> and the <code>IGNORE</code> Keyword</a>, you can use the <code>IGNORE</code> operation to ignore an <code>UPDATE</code>
 which would otherwise fail. For instance, a row may be deleted while you are
 attempting to update it. In Impala, this would cause an error. The <code>IGNORE</code>
 keyword causes the error to be ignored.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">UPDATE IGNORE my_first_table SET name="bob" where id = 3;</code></pre>
 </div>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_deleting_a_row"><a class="link" href="#_deleting_a_row">Deleting a Row</a></h3>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">DELETE FROM my_first_table WHERE id &lt; 3;</code></pre>
 </div>
 </div>
 <div class="paragraph">
 <p>You can also delete using more complex syntax. A comma in the <code>FROM</code> sub-clause is
 one way that Impala specifies a join query. For more information about Impala joins,
 see <a href="http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/impala_joins.html" class="bare">http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/impala_joins.html</a>.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">DELETE c FROM my_second_table c, stock_symbols s WHERE c.name = s.symbol;</code></pre>
 </div>
 </div>
 <div class="admonitionblock important">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-important" title="Important"></i>
 </td>
 <td class="content">
 The <code>DELETE</code> statement only works in Impala when the target table is in
 Kudu.
 </td>
 </tr>
 </table>
 </div>
 <div class="sect3">
 <h4 id="_deleting_in_bulk"><a class="link" href="#_deleting_in_bulk">Deleting In Bulk</a></h4>
 <div class="paragraph">
 <p>You can delete in bulk using the same approaches outlined in
 <a href="#kudu_impala_insert_bulk">Inserting In Bulk</a>.</p>
 </div>
 </div>
 <div class="sect3">
 <h4 id="__code_delete_code_and_the_code_ignore_code_keyword"><a class="link" href="#__code_delete_code_and_the_code_ignore_code_keyword"><code>DELETE</code> and the <code>IGNORE</code> Keyword</a></h4>
 <div class="paragraph">
 <p>Similarly to <a href="#insert_ignore"><code>INSERT</code> and the <code>IGNORE</code> Keyword</a>, you can use the <code>IGNORE</code> operation to ignore an <code>DELETE</code>
 which would otherwise fail. For instance, a row may be deleted by another process
 while you are attempting to delete it. In Impala, this would cause an error. The
 <code>IGNORE</code> keyword causes the error to be ignored.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">DELETE IGNORE FROM my_first_table WHERE id &lt; 3;</code></pre>
 </div>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="impala_insertion_caveat"><a class="link" href="#impala_insertion_caveat">Failures During <code>INSERT</code>, <code>UPDATE</code>, and <code>DELETE</code> Operations</a></h3>
 <div class="paragraph">
 <p><code>INSERT</code>, <code>UPDATE</code>, and <code>DELETE</code> statements cannot be considered transactional as
 a whole. If one of these operations fails part of the way through, the keys may
 have already been created (in the case of <code>INSERT</code>) or the records may have already
 been modified or removed by another process (in the case of <code>UPDATE</code> or <code>DELETE</code>).
 You should design your application with this in mind. See <a href="#insert_ignore"><code>INSERT</code> and the <code>IGNORE</code> Keyword</a>.</p>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_altering_table_properties"><a class="link" href="#_altering_table_properties">Altering Table Properties</a></h3>
 <div class="paragraph">
 <p>You can change Impala&#8217;s metadata relating to a given Kudu table by altering the table&#8217;s
 properties. These properties include the table name, the list of Kudu master addresses,
 and whether the table is managed by Impala (internal) or externally. You cannot modify
 a table&#8217;s split rows after table creation.</p>
 </div>
 <div class="admonitionblock important">
 <table>
 <tr>
 <td class="icon">
 <i class="fa icon-important" title="Important"></i>
 </td>
 <td class="content">
 Altering table properties only changes Impala&#8217;s metadata about the table,
 not the underlying table itself. These statements do not modify any table metadata
 in Kudu.
 </td>
 </tr>
 </table>
 </div>
 <div class="listingblock">
 <div class="title">Rename a Table</div>
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">ALTER TABLE my_table RENAME TO my_new_table;</code></pre>
 </div>
 </div>
 <div class="listingblock">
 <div class="title">Change the Kudu Master Address</div>
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">ALTER TABLE my_table
 SET TBLPROPERTIES('kudu.master_addresses' = 'kudu-new-master.example.com:7051');</code></pre>
 </div>
 </div>
 <div class="listingblock">
 <div class="title">Change an Internally-Managed Table to External</div>
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">ALTER TABLE my_table SET TBLPROPERTIES('EXTERNAL' = 'TRUE');</code></pre>
 </div>
 </div>
 </div>
 <div class="sect2">
 <h3 id="_dropping_a_kudu_table_using_impala"><a class="link" href="#_dropping_a_kudu_table_using_impala">Dropping a Kudu Table Using Impala</a></h3>
 <div class="paragraph">
 <p>If the table was created as an internal table in Impala, using <code>CREATE TABLE</code>, the
 standard <code>DROP TABLE</code> syntax drops the underlying Kudu table and all its data. If
 the table was created as an external table, using <code>CREATE EXTERNAL TABLE</code>, the mapping
 between Impala and Kudu is dropped, but the Kudu table is left intact, with all its
 data.</p>
 </div>
 <div class="listingblock">
 <div class="content">
 <pre class="highlight"><code class="language-sql" data-lang="sql">DROP TABLE my_first_table;</code></pre>
 </div>
 </div>
 </div>
 </div>
 </div>
 <div class="sect1">
 <h2 id="_what_s_next"><a class="link" href="#_what_s_next">What&#8217;s Next?</a></h2>
 <div class="sectionbody">
 <div class="paragraph">
 <p>The examples above have only explored a fraction of what you can do with Impala Shell.</p>
 </div>
 <div class="ulist">
 <ul>
 <li>
 <p>Learn about the <a href="http://impala.io">Impala project</a>.</p>
 </li>
 <li>
 <p>Read the <a href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/impala.html">Impala documentation</a>.</p>
 </li>
 <li>
 <p>View the <a href="http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/impala_langref.html">Impala SQL reference</a>.</p>
 </li>
 <li>
 <p>Read about Impala internals or learn how to contribute to Impala on the <a href="https://github.com/cloudera/Impala/wiki">Impala Wiki</a>.</p>
 </li>
 <li>
 <p>Read about the native <a href="installation.html#view_api">Kudu APIs</a>.</p>
 </li>
 </ul>
 </div>
 <div class="sect2">
 <h3 id="_known_issues_and_limitations"><a class="link" href="#_known_issues_and_limitations">Known Issues and Limitations</a></h3>
 <div class="ulist">
 <ul>
 <li>
 <p>Kudu tables with a name containing upper case or non-ascii characters must be
 assigned an alternate name when used as an external table in Impala.</p>
 </li>
 <li>
 <p>Kudu tables with a column name containing upper case or non-ascii characters
 may not be used as an external table in Impala. Non-primary key columns may be
 renamed in Kudu to work around this issue.</p>
 </li>
 <li>
 <p>When creating a Kudu table, the <code>CREATE TABLE</code> statement must include the
 primary key columns before other columns, in primary key order.</p>
 </li>
 <li>
 <p>Kudu tables containing <code>UNIXTIME_MICROS</code>-typed columns may not be used as an
 external table in Impala.</p>
 </li>
 <li>
 <p>Impala can not create Kudu tables with <code>TIMESTAMP</code> or nested-typed columns.</p>
 </li>
 <li>
 <p>Impala can not update values in primary key columns.</p>
 </li>
 <li>
 <p><code>NULL</code>, <code>NOT NULL</code>, <code>!=</code>, and <code>IN</code> predicates are not pushed to Kudu, and
 instead will be evaluated by the Impala scan node.</p>
 </li>
 <li>
 <p>Impala can not specify column encoding or compression during Kudu table
 creation, or alter a columns encoding or compression.</p>
 </li>
 <li>
 <p>Impala can not create Kudu tables with bounded range partitions, and can not
 alter a table to add or remove range partitions.</p>
 </li>
 <li>
 <p>When bulk writing to a Kudu table, performance may be improved by setting the
 <code>batch_size</code> option (see <a href="#kudu_impala_insert_bulk">Inserting In Bulk</a>).</p>
 </li>
 </ul>
 </div>
 </div>
 </div>
 </div>
     </div>
     <div class="col-md-3">

   <div id="toc" data-spy="affix" data-offset-top="70">
   <ul>

       <li>

           <a href="index.html">Introducing Kudu</a>
       </li>
       <li>

           <a href="release_notes.html">Kudu Release Notes</a>
       </li>
       <li>

           <a href="quickstart.html">Getting Started with Kudu</a>
       </li>
       <li>

           <a href="installation.html">Installation Guide</a>
       </li>
       <li>

           <a href="configuration.html">Configuring Kudu</a>
       </li>
       <li>
 <span class="active-toc">Using Impala with Kudu</span>
             <ul class="sectlevel1">
 <li><a href="#_requirements_and_implications">Requirements and Implications</a></li>
 <li><a href="#_installing_impala_kudu_using_cloudera_manager">Installing Impala_Kudu Using Cloudera Manager</a>
 <ul class="sectlevel2">
 <li><a href="#install_impala_kudu_parcels">Installing the Impala_Kudu Service Using Parcels</a></li>
 <li><a href="#install_impala_kudu_packages">Installing Impala_Kudu Using Packages</a></li>
 <li><a href="#_adding_impala_service_in_cloudera_manager">Adding Impala service in Cloudera Manager</a></li>
 </ul>
 </li>
 <li><a href="#_installing_impala_kudu_without_cloudera_manager">Installing Impala_Kudu Without Cloudera Manager</a>
 <ul class="sectlevel2">
 <li><a href="#_starting_impala_kudu_services">Starting Impala_Kudu Services</a></li>
 </ul>
 </li>
 <li><a href="#_using_the_impala_shell">Using the Impala Shell</a>
 <ul class="sectlevel2">
 <li><a href="#_internal_and_external_impala_tables">Internal and External Impala Tables</a></li>
 <li><a href="#_querying_an_existing_kudu_table_in_impala">Querying an Existing Kudu Table In Impala</a></li>
 <li><a href="#kudu_impala_create_table">Creating a New Kudu Table From Impala</a></li>
 <li><a href="#_optimizing_performance_for_evaluating_sql_predicates">Optimizing Performance for Evaluating SQL Predicates</a></li>
 <li><a href="#partitioning_tables">Partitioning Tables</a></li>
 <li><a href="#_inserting_data_into_kudu_tables">Inserting Data Into Kudu Tables</a></li>
 <li><a href="#_updating_a_row">Updating a Row</a></li>
 <li><a href="#_deleting_a_row">Deleting a Row</a></li>
 <li><a href="#impala_insertion_caveat">Failures During <code>INSERT</code>, <code>UPDATE</code>, and <code>DELETE</code> Operations</a></li>
 <li><a href="#_altering_table_properties">Altering Table Properties</a></li>
 <li><a href="#_dropping_a_kudu_table_using_impala">Dropping a Kudu Table Using Impala</a></li>
 </ul>
 </li>
 <li><a href="#_what_s_next">What&#8217;s Next?</a>
 <ul class="sectlevel2">
 <li><a href="#_known_issues_and_limitations">Known Issues and Limitations</a></li>
 </ul>
 </li>
 </ul>
       </li>
       <li>

           <a href="administration.html">Administering Kudu</a>
       </li>
       <li>

           <a href="troubleshooting.html">Troubleshooting Kudu</a>
       </li>
       <li>

           <a href="developing.html">Developing Applications with Kudu</a>
       </li>
       <li>

           <a href="schema_design.html">Kudu Schema Design</a>
       </li>
       <li>

           <a href="transaction_semantics.html">Kudu Transaction Semantics</a>
       </li>
       <li>

           <a href="contributing.html">Contributing to Kudu</a>
       </li>
       <li>

           <a href="style_guide.html">Kudu Documentation Style Guide</a>
       </li>
       <li>

           <a href="configuration_reference.html">Kudu Configuration Reference</a>
       </li>
       <li>

           <a href="export_control.html">Export Control Notice</a>
       </li>
   </ul>
   </div>
     </div>
   </div>
 </div>
       <footer class="footer">
         <div class="row">
           <div class="col-md-9">
             <p class="small">
             Copyright &copy; 2019 The Apache Software Foundation.  Last updated 2016-11-14 15:52:59 PST
             </p>
             <p class="small">
             Apache Kudu, Kudu, Apache, the Apache feather logo, and the Apache Kudu
             project logo are either registered trademarks or trademarks of The
             Apache Software Foundation in the United States and other countries.
             </p>
           </div>
           <div class="col-md-3">
             <a class="pull-right" href="https://www.apache.org/events/current-event.html">
                 <img src="https://www.apache.org/events/current-event-234x60.png"/>
             </a>
           </div>
         </div>
       </footer>
     </div>
     <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
     <script>
       // Try to detect touch-screen devices. Note: Many laptops have touch screens.
       $(document).ready(function() {
         if ("ontouchstart" in document.documentElement) {
           $(document.documentElement).addClass("touch");
         } else {
           $(document.documentElement).addClass("no-touch");
         }
       });
     </script>
     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js"
             integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS"
             crossorigin="anonymous"></script>
     <script>
       (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
       (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
       m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
       })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

       ga('create', 'UA-68448017-1', 'auto');
       ga('send', 'pageview');
     </script>
     <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/3.1.0/anchor.js"></script>
     <script>
       anchors.options = {
         placement: 'right',
         visible: 'touch',
       };
       anchors.add();
     </script>
   </body>
 </html>