blob: f12d12d5d6a18482f8ce2203b056777c007ba730 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>airflow.hooks.hive_hooks &mdash; Airflow Documentation</title>
<script type="text/javascript" src="../../../../_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../../_static/doctools.js"></script>
<script type="text/javascript" src="../../../../_static/language_data.js"></script>
<script type="text/javascript" src="../../../../_static/js/theme.js"></script>
<link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<link rel="next" title="airflow.hooks.http_hook" href="../http_hook/index.html" />
<link rel="prev" title="airflow.hooks.hdfs_hook" href="../hdfs_hook/index.html" />
<script>
document.addEventListener('DOMContentLoaded', function() {
var el = document.getElementById('changelog');
if (el !== null ) {
// [AIRFLOW-...]
el.innerHTML = el.innerHTML.replace(
/\[(AIRFLOW-[\d]+)\]/g,
`<a href="https://issues.apache.org/jira/browse/$1">[$1]</a>`
);
// (#...)
el.innerHTML = el.innerHTML.replace(
/\(#([\d]+)\)/g,
`<a href="https://github.com/apache/airflow/pull/$1">(#$1)</a>`
);
};
})
</script>
<style>
.example-header {
position: relative;
background: #9AAA7A;
padding: 8px 16px;
margin-bottom: 0;
}
.example-header--with-button {
padding-right: 166px;
}
.example-header:after{
content: '';
display: table;
clear: both;
}
.example-title {
display:block;
padding: 4px;
margin-right: 16px;
color: white;
overflow-x: auto;
}
.example-header-button {
top: 8px;
right: 16px;
position: absolute;
}
.example-header + .highlight-python {
margin-top: 0 !important;
}
.viewcode-button {
display: inline-block;
padding: 8px 16px;
border: 0;
margin: 0;
outline: 0;
border-radius: 2px;
-webkit-box-shadow: 0 3px 5px 0 rgba(0,0,0,.3);
box-shadow: 0 3px 6px 0 rgba(0,0,0,.3);
color: #404040;
background-color: #e7e7e7;
cursor: pointer;
font-size: 16px;
font-weight: 500;
line-height: 1;
text-decoration: none;
text-overflow: ellipsis;
overflow: hidden;
text-transform: uppercase;
-webkit-transition: background-color .2s;
transition: background-color .2s;
vertical-align: middle;
white-space: nowrap;
}
.viewcode-button:visited {
color: #404040;
}
.viewcode-button:hover, .viewcode-button:focus {
color: #404040;
background-color: #d6d6d6;
}
</style>
<script type="application/javascript">
window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
ga("create", "UA-140539454-1", "auto");
ga("send", "pageview");
</script>
<script async src="https://www.google-analytics.com/analytics.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../../index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.4
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../../../project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorial.html">Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../cli.html">Command Line Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api.html">Experimental Rest API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../macros.html">Macros reference</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../../../index.html">API Reference</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../../index.html#operators">Operators</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="../../../index.html#hooks">Hooks</a><ul class="current">
<li class="toctree-l3 current"><a class="reference internal" href="../../../index.html#hooks-packages">Hooks packages</a><ul class="current">
<li class="toctree-l4 current"><a class="reference internal" href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../contrib/hooks/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.hooks</span></code></a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../index.html#executors">Executors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../index.html#models">Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../index.html#core-and-community-package">Core and community package</a></li>
</ul>
</li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../../index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../../index.html">API Reference</a> &raquo;</li>
<li><a href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks</span></code></a> &raquo;</li>
<li><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks.hive_hooks</span></code></li>
<li class="wy-breadcrumbs-aside">
<a href="../../../../_sources/_api/airflow/hooks/hive_hooks/index.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-airflow.hooks.hive_hooks">
<span id="airflow-hooks-hive-hooks"></span><h1><a class="reference internal" href="#module-airflow.hooks.hive_hooks" title="airflow.hooks.hive_hooks"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks.hive_hooks</span></code></a><a class="headerlink" href="#module-airflow.hooks.hive_hooks" title="Permalink to this headline"></a></h1>
<div class="section" id="module-contents">
<h2>Module Contents<a class="headerlink" href="#module-contents" title="Permalink to this headline"></a></h2>
<dl class="data">
<dt id="airflow.hooks.hive_hooks.HIVE_QUEUE_PRIORITIES">
<code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HIVE_QUEUE_PRIORITIES</code><em class="property"> = ['VERY_HIGH', 'HIGH', 'NORMAL', 'LOW', 'VERY_LOW']</em><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HIVE_QUEUE_PRIORITIES"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HIVE_QUEUE_PRIORITIES" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="function">
<dt id="airflow.hooks.hive_hooks.get_context_from_env_var">
<code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">get_context_from_env_var</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#get_context_from_env_var"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.get_context_from_env_var" title="Permalink to this definition"></a></dt>
<dt>
<code class="sig-name descname">Extract context from env variable, e.g. dag_id, task_id and execution_date,</code></dt>
<dt>
<code class="sig-name descname">so that they can be used inside BashOperator and PythonOperator.</code></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>The context of interest.</p>
</dd>
</dl>
</dd></dl>
<dl class="class">
<dt id="airflow.hooks.hive_hooks.HiveCliHook">
<em class="property">class </em><code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HiveCliHook</code><span class="sig-paren">(</span><em class="sig-param">hive_cli_conn_id='hive_cli_default'</em>, <em class="sig-param">run_as=None</em>, <em class="sig-param">mapred_queue=None</em>, <em class="sig-param">mapred_queue_priority=None</em>, <em class="sig-param">mapred_job_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="../base_hook/index.html#airflow.hooks.base_hook.BaseHook" title="airflow.hooks.base_hook.BaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></a></p>
<p>Simple wrapper around the hive CLI.</p>
<p>It also supports the <code class="docutils literal notranslate"><span class="pre">beeline</span></code>
a lighter CLI that runs JDBC and is replacing the heavier
traditional CLI. To enable <code class="docutils literal notranslate"><span class="pre">beeline</span></code>, set the use_beeline param in the
extra field of your connection as in <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">&quot;use_beeline&quot;:</span> <span class="pre">true</span> <span class="pre">}</span></code></p>
<p>Note that you can also set default hive CLI parameters using the
<code class="docutils literal notranslate"><span class="pre">hive_cli_params</span></code> to be used in your connection as in
<code class="docutils literal notranslate"><span class="pre">{&quot;hive_cli_params&quot;:</span> <span class="pre">&quot;-hiveconf</span> <span class="pre">mapred.job.tracker=some.jobtracker:444&quot;}</span></code>
Parameters passed here can be overridden by run_cli’s hive_conf param</p>
<p>The extra connection parameter <code class="docutils literal notranslate"><span class="pre">auth</span></code> gets passed as in the <code class="docutils literal notranslate"><span class="pre">jdbc</span></code>
connection string as is.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>mapred_queue</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – queue used by the Hadoop Scheduler (Capacity or Fair)</p></li>
<li><p><strong>mapred_queue_priority</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – priority within the job queue.
Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW</p></li>
<li><p><strong>mapred_job_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – This name will appear in the jobtracker.
This can make monitoring easier.</p></li>
</ul>
</dd>
</dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook._get_proxy_user">
<code class="sig-name descname">_get_proxy_user</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook._get_proxy_user"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook._get_proxy_user" title="Permalink to this definition"></a></dt>
<dd><p>This function set the proper proxy_user value in case the user overwtire the default.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook._prepare_cli_cmd">
<code class="sig-name descname">_prepare_cli_cmd</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook._prepare_cli_cmd"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook._prepare_cli_cmd" title="Permalink to this definition"></a></dt>
<dd><p>This function creates the command list from available information</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook._prepare_hiveconf">
<em class="property">static </em><code class="sig-name descname">_prepare_hiveconf</code><span class="sig-paren">(</span><em class="sig-param">d</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook._prepare_hiveconf"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook._prepare_hiveconf" title="Permalink to this definition"></a></dt>
<dd><p>This function prepares a list of hiveconf params
from a dictionary of key value pairs.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>d</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – </p>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveCliHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hive_conf</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;hive.exec.dynamic.partition&quot;</span><span class="p">:</span> <span class="s2">&quot;true&quot;</span><span class="p">,</span>
<span class="gp">... </span><span class="s2">&quot;hive.exec.dynamic.partition.mode&quot;</span><span class="p">:</span> <span class="s2">&quot;nonstrict&quot;</span><span class="p">}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">_prepare_hiveconf</span><span class="p">(</span><span class="n">hive_conf</span><span class="p">)</span>
<span class="go">[&quot;-hiveconf&quot;, &quot;hive.exec.dynamic.partition=true&quot;, &quot;-hiveconf&quot;, &quot;hive.exec.dynamic.partition.mode=nonstrict&quot;]</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook.run_cli">
<code class="sig-name descname">run_cli</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema=None</em>, <em class="sig-param">verbose=True</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.run_cli"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.run_cli" title="Permalink to this definition"></a></dt>
<dd><p>Run an hql statement using the hive cli. If hive_conf is specified
it should be a dict and the entries will be set as key/value pairs
in HiveConf</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – if specified these key value pairs will be passed
to hive as <code class="docutils literal notranslate"><span class="pre">-hiveconf</span> <span class="pre">&quot;key&quot;=&quot;value&quot;</span></code>. Note that they will be
passed after the <code class="docutils literal notranslate"><span class="pre">hive_cli_params</span></code> and thus will override
whatever values are specified in the database.</p>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveCliHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">run_cli</span><span class="p">(</span><span class="s2">&quot;USE airflow;&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="s2">&quot;OK&quot;</span> <span class="ow">in</span> <span class="n">result</span><span class="p">)</span>
<span class="go">True</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook.test_hql">
<code class="sig-name descname">test_hql</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.test_hql"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.test_hql" title="Permalink to this definition"></a></dt>
<dd><p>Test an hql statement using the hive cli and EXPLAIN</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook.load_df">
<code class="sig-name descname">load_df</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">df</em>, <em class="sig-param">table</em>, <em class="sig-param">field_dict=None</em>, <em class="sig-param">delimiter='</em>, <em class="sig-param">'</em>, <em class="sig-param">encoding='utf8'</em>, <em class="sig-param">pandas_kwargs=None</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.load_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.load_df" title="Permalink to this definition"></a></dt>
<dd><p>Loads a pandas DataFrame into hive.</p>
<p>Hive data types will be inferred if not passed but column names will
not be sanitized.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>df</strong> (<a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html#pandas.DataFrame" title="(in pandas v0.25.0)"><em>pandas.DataFrame</em></a>) – DataFrame to load into a Hive table</p></li>
<li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – target Hive table, use dot notation to target a
specific database</p></li>
<li><p><strong>field_dict</strong> (<a class="reference external" href="https://docs.python.org/3/library/collections.html#collections.OrderedDict" title="(in Python v3.7)"><em>collections.OrderedDict</em></a>) – mapping from column name to hive data type.
Note that it must be OrderedDict so as to keep columns’ order.</p></li>
<li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – field delimiter in the file</p></li>
<li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – str encoding to use when writing DataFrame to file</p></li>
<li><p><strong>pandas_kwargs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – passed to DataFrame.to_csv</p></li>
<li><p><strong>kwargs</strong> – passed to self.load_file</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook.load_file">
<code class="sig-name descname">load_file</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">filepath</em>, <em class="sig-param">table</em>, <em class="sig-param">delimiter='</em>, <em class="sig-param">'</em>, <em class="sig-param">field_dict=None</em>, <em class="sig-param">create=True</em>, <em class="sig-param">overwrite=True</em>, <em class="sig-param">partition=None</em>, <em class="sig-param">recreate=False</em>, <em class="sig-param">tblproperties=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Loads a local file into Hive</p>
<p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code>
which isn’t the most efficient serialization format. If a
large amount of data is loaded and/or if the tables gets
queried considerably, you may want to use this operator only to
stage the data into a temporary table before loading it into its
final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>filepath</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – local filepath of the file to load</p></li>
<li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – target Hive table, use dot notation to target a
specific database</p></li>
<li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – field delimiter in the file</p></li>
<li><p><strong>field_dict</strong> (<a class="reference external" href="https://docs.python.org/3/library/collections.html#collections.OrderedDict" title="(in Python v3.7)"><em>collections.OrderedDict</em></a>) – A dictionary of the fields name in the file
as keys and their Hive types as values.
Note that it must be OrderedDict so as to keep columns’ order.</p></li>
<li><p><strong>create</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.7)"><em>bool</em></a>) – whether to create the table if it doesn’t exist</p></li>
<li><p><strong>overwrite</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.7)"><em>bool</em></a>) – whether to overwrite the data in table or partition</p></li>
<li><p><strong>partition</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – target partition as a dict of partition columns
and values</p></li>
<li><p><strong>recreate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.7)"><em>bool</em></a>) – whether to drop and recreate the table at every
execution</p></li>
<li><p><strong>tblproperties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – TBLPROPERTIES of the hive table being created</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveCliHook.kill">
<code class="sig-name descname">kill</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.kill" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook">
<em class="property">class </em><code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HiveMetastoreHook</code><span class="sig-paren">(</span><em class="sig-param">metastore_conn_id='metastore_default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="../base_hook/index.html#airflow.hooks.base_hook.BaseHook" title="airflow.hooks.base_hook.BaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></a></p>
<p>Wrapper to interact with the Hive Metastore</p>
<dl class="attribute">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.MAX_PART_COUNT">
<code class="sig-name descname">MAX_PART_COUNT</code><em class="property"> = 32767</em><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.MAX_PART_COUNT"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.MAX_PART_COUNT" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.__getstate__">
<code class="sig-name descname">__getstate__</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.__getstate__"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.__getstate__" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.__setstate__">
<code class="sig-name descname">__setstate__</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">d</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.__setstate__"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.__setstate__" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_metastore_client">
<code class="sig-name descname">get_metastore_client</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_metastore_client"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_metastore_client" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Hive thrift client.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_conn">
<code class="sig-name descname">get_conn</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_conn" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_partition">
<code class="sig-name descname">check_for_partition</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table</em>, <em class="sig-param">partition</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.check_for_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_partition" title="Permalink to this definition"></a></dt>
<dd><p>Checks whether a partition exists</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – Name of hive schema (database) &#64;table belongs to</p></li>
<li><p><strong>table</strong> – Name of hive table &#64;partition belongs to</p></li>
</ul>
</dd>
<dt class="field-even">Partition</dt>
<dd class="field-even"><p>Expression that matches the partitions to check for
(eg <cite>a = ‘b’ AND c = ‘d’</cite>)</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.7)">bool</a></p>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">t</span> <span class="o">=</span> <span class="s1">&#39;static_babynames_partitioned&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_partition</span><span class="p">(</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">&quot;ds=&#39;2015-01-01&#39;&quot;</span><span class="p">)</span>
<span class="go">True</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition">
<code class="sig-name descname">check_for_named_partition</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table</em>, <em class="sig-param">partition_name</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.check_for_named_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition" title="Permalink to this definition"></a></dt>
<dd><p>Checks whether a partition with a given name exists</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – Name of hive schema (database) &#64;table belongs to</p></li>
<li><p><strong>table</strong> – Name of hive table &#64;partition belongs to</p></li>
</ul>
</dd>
<dt class="field-even">Partition</dt>
<dd class="field-even"><p>Name of the partitions to check for (eg <cite>a=b/c=d</cite>)</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.7)">bool</a></p>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">t</span> <span class="o">=</span> <span class="s1">&#39;static_babynames_partitioned&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_named_partition</span><span class="p">(</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">&quot;ds=2015-01-01&quot;</span><span class="p">)</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_named_partition</span><span class="p">(</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">&quot;ds=xxx&quot;</span><span class="p">)</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_table">
<code class="sig-name descname">get_table</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">table_name</em>, <em class="sig-param">db='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_table"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_table" title="Permalink to this definition"></a></dt>
<dd><p>Get a metastore table object</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">t</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_table</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">&#39;static_babynames&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">t</span><span class="o">.</span><span class="n">tableName</span>
<span class="go">&#39;static_babynames&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="p">[</span><span class="n">col</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">sd</span><span class="o">.</span><span class="n">cols</span><span class="p">]</span>
<span class="go">[&#39;state&#39;, &#39;year&#39;, &#39;name&#39;, &#39;gender&#39;, &#39;num&#39;]</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_tables">
<code class="sig-name descname">get_tables</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">db</em>, <em class="sig-param">pattern='*'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_tables"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_tables" title="Permalink to this definition"></a></dt>
<dd><p>Get a metastore table object</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_databases">
<code class="sig-name descname">get_databases</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">pattern='*'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_databases"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_databases" title="Permalink to this definition"></a></dt>
<dd><p>Get a metastore table object</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_partitions">
<code class="sig-name descname">get_partitions</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table_name</em>, <em class="sig-param">filter=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_partitions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_partitions" title="Permalink to this definition"></a></dt>
<dd><p>Returns a list of all partitions in a table. Works only
for tables with less than 32767 (java short max val).
For subpartitioned table, the number might easily exceed this.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">t</span> <span class="o">=</span> <span class="s1">&#39;static_babynames_partitioned&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parts</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_partitions</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">t</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">len</span><span class="p">(</span><span class="n">parts</span><span class="p">)</span>
<span class="go">1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parts</span>
<span class="go">[{&#39;ds&#39;: &#39;2015-01-01&#39;}]</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook._get_max_partition_from_part_specs">
<em class="property">static </em><code class="sig-name descname">_get_max_partition_from_part_specs</code><span class="sig-paren">(</span><em class="sig-param">part_specs</em>, <em class="sig-param">partition_key</em>, <em class="sig-param">filter_map</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook._get_max_partition_from_part_specs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook._get_max_partition_from_part_specs" title="Permalink to this definition"></a></dt>
<dd><p>Helper method to get max partition of partitions with partition_key
from part specs. key:value pair in filter_map will be used to
filter out partitions.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>part_specs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.7)"><em>list</em></a>) – list of partition specs.</p></li>
<li><p><strong>partition_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – partition key name.</p></li>
<li><p><strong>filter_map</strong> (<em>map</em>) – partition_key:partition_value map used for partition filtering,
e.g. {‘key1’: ‘value1’, ‘key2’: ‘value2’}.
Only partitions matching all partition_key:partition_value
pairs will be considered as candidates of max partition.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Max partition or None if part_specs is empty.</p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.max_partition">
<code class="sig-name descname">max_partition</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table_name</em>, <em class="sig-param">field=None</em>, <em class="sig-param">filter_map=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.max_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.max_partition" title="Permalink to this definition"></a></dt>
<dd><p>Returns the maximum value for all partitions with given field in a table.
If only one partition key exist in the table, the key will be used as field.
filter_map should be a partition_key:partition_value map and will be used to
filter out partitions.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – schema name.</p></li>
<li><p><strong>table_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – table name.</p></li>
<li><p><strong>field</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – partition key to get max partition from.</p></li>
<li><p><strong>filter_map</strong> (<em>map</em>) – partition_key:partition_value map used for partition filtering.</p></li>
</ul>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">filter_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ds&#39;</span><span class="p">:</span> <span class="s1">&#39;2015-01-01&#39;</span><span class="p">,</span> <span class="s1">&#39;ds&#39;</span><span class="p">:</span> <span class="s1">&#39;2014-01-01&#39;</span><span class="p">}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">t</span> <span class="o">=</span> <span class="s1">&#39;static_babynames_partitioned&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">max_partition</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="o">...</span> <span class="n">table_name</span><span class="o">=</span><span class="n">t</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="s1">&#39;ds&#39;</span><span class="p">,</span> <span class="n">filter_map</span><span class="o">=</span><span class="n">filter_map</span><span class="p">)</span>
<span class="go">&#39;2015-01-01&#39;</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.table_exists">
<code class="sig-name descname">table_exists</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">table_name</em>, <em class="sig-param">db='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.table_exists" title="Permalink to this definition"></a></dt>
<dd><p>Check if table exists</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">&#39;static_babynames&#39;</span><span class="p">)</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">&#39;airflow&#39;</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">&#39;does_not_exist&#39;</span><span class="p">)</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook">
<em class="property">class </em><code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HiveServer2Hook</code><span class="sig-paren">(</span><em class="sig-param">hiveserver2_conn_id='hiveserver2_default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="../base_hook/index.html#airflow.hooks.base_hook.BaseHook" title="airflow.hooks.base_hook.BaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></a></p>
<p>Wrapper around the pyhive library</p>
<p>Notes:
* the default authMechanism is PLAIN, to override it you
can specify it in the <code class="docutils literal notranslate"><span class="pre">extra</span></code> of your connection in the UI
* the default for run_set_variable_statements is true, if you
are using impala you may need to set it to false in the
<code class="docutils literal notranslate"><span class="pre">extra</span></code> of your connection in the UI</p>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_conn">
<code class="sig-name descname">get_conn</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Hive connection object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook._get_results">
<code class="sig-name descname">_get_results</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em>, <em class="sig-param">fetch_size=None</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook._get_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook._get_results" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_results">
<code class="sig-name descname">get_results</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em>, <em class="sig-param">fetch_size=None</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_results" title="Permalink to this definition"></a></dt>
<dd><p>Get results of the provided hql in target schema.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.7)"><em>list</em></a>) – hql to be executed.</p></li>
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – target schema, default to ‘default’.</p></li>
<li><p><strong>fetch_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.7)"><em>int</em></a>) – max size of result to fetch.</p></li>
<li><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – hive_conf to execute alone with the hql.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>results of hql execution, dict with data (list of results) and header</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)">dict</a></p>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook.to_csv">
<code class="sig-name descname">to_csv</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">csv_filepath</em>, <em class="sig-param">schema='default'</em>, <em class="sig-param">delimiter='</em>, <em class="sig-param">'</em>, <em class="sig-param">lineterminator='rn'</em>, <em class="sig-param">output_header=True</em>, <em class="sig-param">fetch_size=1000</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.to_csv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.to_csv" title="Permalink to this definition"></a></dt>
<dd><p>Execute hql in target schema and write results to a csv file.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.7)"><em>list</em></a>) – hql to be executed.</p></li>
<li><p><strong>csv_filepath</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – filepath of csv to write results into.</p></li>
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – target schema, default to ‘default’.</p></li>
<li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – delimiter of the csv file, default to ‘,’.</p></li>
<li><p><strong>lineterminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – lineterminator of the csv file.</p></li>
<li><p><strong>output_header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.7)"><em>bool</em></a>) – header of the csv file, default to True.</p></li>
<li><p><strong>fetch_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.7)"><em>int</em></a>) – number of result rows to write into the csv file, default to 1000.</p></li>
<li><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – hive_conf to execute alone with the hql.</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_records">
<code class="sig-name descname">get_records</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_records" title="Permalink to this definition"></a></dt>
<dd><p>Get a set of records from a Hive query.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.7)"><em>list</em></a>) – hql to be executed.</p></li>
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – target schema, default to ‘default’.</p></li>
<li><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.7)"><em>dict</em></a>) – hive_conf to execute alone with the hql.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>result of hive execution</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.7)">list</a></p>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveServer2Hook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">&quot;SELECT * FROM airflow.static_babynames LIMIT 100&quot;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">len</span><span class="p">(</span><span class="n">hh</span><span class="o">.</span><span class="n">get_records</span><span class="p">(</span><span class="n">sql</span><span class="p">))</span>
<span class="go">100</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_pandas_df">
<code class="sig-name descname">get_pandas_df</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_pandas_df" title="Permalink to this definition"></a></dt>
<dd><p>Get a pandas dataframe from a Hive query</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.7)"><em>list</em></a>) – hql to be executed.</p></li>
<li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.7)"><em>str</em></a>) – target schema, default to ‘default’.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>result of hql execution</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>DataFrame</p>
</dd>
</dl>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveServer2Hook</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">&quot;SELECT * FROM airflow.static_babynames LIMIT 100&quot;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_pandas_df</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="p">)</span>
<span class="go">100</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>pandas.DateFrame</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="../http_hook/index.html" class="btn btn-neutral float-right" title="airflow.hooks.http_hook" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="../hdfs_hook/index.html" class="btn btn-neutral float-left" title="airflow.hooks.hdfs_hook" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>