| |
| |
| <!-- |
| Javascript to render AIRFLOW-XXX and PR references in text |
| as HTML links. |
| |
| Overrides extrahead block from sphinx_rtd_theme |
| https://www.sphinx-doc.org/en/master/templating.html |
| --> |
| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>airflow.hooks.hive_hooks — Airflow Documentation</title> |
| |
| |
| |
| |
| <link rel="shortcut icon" href="../../../../_static/pin_32.png"/> |
| |
| |
| |
| |
| |
| <script type="text/javascript" src="../../../../_static/js/modernizr.min.js"></script> |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script> |
| <script type="text/javascript" src="../../../../_static/jquery.js"></script> |
| <script type="text/javascript" src="../../../../_static/underscore.js"></script> |
| <script type="text/javascript" src="../../../../_static/doctools.js"></script> |
| <script type="text/javascript" src="../../../../_static/language_data.js"></script> |
| <script type="text/javascript" src="../../../../_static/jira-links.js"></script> |
| |
| <script type="text/javascript" src="../../../../_static/js/theme.js"></script> |
| |
| |
| |
| |
| <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../../_static/graphviz.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../../_static/exampleinclude.css" type="text/css" /> |
| <link rel="index" title="Index" href="../../../../genindex.html" /> |
| <link rel="search" title="Search" href="../../../../search.html" /> |
| <link rel="next" title="airflow.hooks.http_hook" href="../http_hook/index.html" /> |
| <link rel="prev" title="airflow.hooks.hdfs_hook" href="../hdfs_hook/index.html" /> |
| |
| <script> |
| </script> |
| <style> |
| |
| </style> |
| |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="../../../../index.html" class="icon icon-home"> Airflow |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| <div class="version"> |
| 1.10.8 |
| </div> |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../project.html">Project</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../license.html">License</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../start.html">Quick Start</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../tutorial.html">Tutorial</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../howto/index.html">How-to Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../ui.html">UI / Screenshots</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../concepts.html">Concepts</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../profiling.html">Data Profiling</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../cli.html">Command Line Interface Reference</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../scheduler.html">Scheduling & Triggers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../executor/index.html">Executor</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../plugins.html">Plugins</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../security.html">Security</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../timezone.html">Time zones</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../api.html">REST API Reference</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../integration.html">Integration</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../metrics.html">Metrics</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../errors.html">Error Tracking</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../kubernetes.html">Kubernetes</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../lineage.html">Lineage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../dag-serialization.html">DAG Serialization</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../changelog.html">Changelog</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../best-practices.html">Best Practices</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../faq.html">FAQ</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../macros.html">Macros reference</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../privacy_notice.html">Privacy Notice</a></li> |
| </ul> |
| <p class="caption"><span class="caption-text">References</span></p> |
| <ul class="current"> |
| <li class="toctree-l1 current"><a class="reference internal" href="../../../index.html">Python API</a><ul class="current"> |
| <li class="toctree-l2"><a class="reference internal" href="../../../index.html#operators">Operators</a></li> |
| <li class="toctree-l2 current"><a class="reference internal" href="../../../index.html#hooks">Hooks</a><ul class="current"> |
| <li class="toctree-l3 current"><a class="reference internal" href="../../../index.html#hooks-packages">Hooks packages</a><ul class="current"> |
| <li class="toctree-l4 current"><a class="reference internal" href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="../../contrib/hooks/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.hooks</span></code></a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../index.html#executors">Executors</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../index.html#models">Models</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../index.html#core-and-community-package">Core and community package</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../configurations-ref.html">Configurations</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../../../../index.html">Airflow</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="../../../../index.html">Docs</a> »</li> |
| |
| <li><a href="../../../index.html">Python API Reference</a> »</li> |
| |
| <li><a href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks</span></code></a> »</li> |
| |
| <li><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks.hive_hooks</span></code></li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="../../../../_sources/_api/airflow/hooks/hive_hooks/index.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="module-airflow.hooks.hive_hooks"> |
| <span id="airflow-hooks-hive-hooks"></span><h1><a class="reference internal" href="#module-airflow.hooks.hive_hooks" title="airflow.hooks.hive_hooks"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.hooks.hive_hooks</span></code></a><a class="headerlink" href="#module-airflow.hooks.hive_hooks" title="Permalink to this headline">¶</a></h1> |
| <div class="section" id="module-contents"> |
| <h2>Module Contents<a class="headerlink" href="#module-contents" title="Permalink to this headline">¶</a></h2> |
| <dl class="data"> |
| <dt id="airflow.hooks.hive_hooks.HIVE_QUEUE_PRIORITIES"> |
| <code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HIVE_QUEUE_PRIORITIES</code><em class="property"> = ['VERY_HIGH', 'HIGH', 'NORMAL', 'LOW', 'VERY_LOW']</em><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HIVE_QUEUE_PRIORITIES"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HIVE_QUEUE_PRIORITIES" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.hooks.hive_hooks.get_context_from_env_var"> |
| <code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">get_context_from_env_var</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#get_context_from_env_var"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.get_context_from_env_var" title="Permalink to this definition">¶</a></dt> |
| <dt> |
| <code class="sig-name descname">Extract context from env variable, e.g. dag_id, task_id and execution_date,</code></dt> |
| <dt> |
| <code class="sig-name descname">so that they can be used inside BashOperator and PythonOperator.</code></dt> |
| <dd><dl class="field-list simple"> |
| <dt class="field-odd">Returns</dt> |
| <dd class="field-odd"><p>The context of interest.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HiveCliHook</code><span class="sig-paren">(</span><em class="sig-param">hive_cli_conn_id='hive_cli_default'</em>, <em class="sig-param">run_as=None</em>, <em class="sig-param">mapred_queue=None</em>, <em class="sig-param">mapred_queue_priority=None</em>, <em class="sig-param">mapred_job_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="../base_hook/index.html#airflow.hooks.base_hook.BaseHook" title="airflow.hooks.base_hook.BaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></a></p> |
| <p>Simple wrapper around the hive CLI.</p> |
| <p>It also supports the <code class="docutils literal notranslate"><span class="pre">beeline</span></code> |
| a lighter CLI that runs JDBC and is replacing the heavier |
| traditional CLI. To enable <code class="docutils literal notranslate"><span class="pre">beeline</span></code>, set the use_beeline param in the |
| extra field of your connection as in <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"use_beeline":</span> <span class="pre">true</span> <span class="pre">}</span></code></p> |
| <p>Note that you can also set default hive CLI parameters using the |
| <code class="docutils literal notranslate"><span class="pre">hive_cli_params</span></code> to be used in your connection as in |
| <code class="docutils literal notranslate"><span class="pre">{"hive_cli_params":</span> <span class="pre">"-hiveconf</span> <span class="pre">mapred.job.tracker=some.jobtracker:444"}</span></code> |
| Parameters passed here can be overridden by run_cli’s hive_conf param</p> |
| <p>The extra connection parameter <code class="docutils literal notranslate"><span class="pre">auth</span></code> gets passed as in the <code class="docutils literal notranslate"><span class="pre">jdbc</span></code> |
| connection string as is.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>mapred_queue</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – queue used by the Hadoop Scheduler (Capacity or Fair)</p></li> |
| <li><p><strong>mapred_queue_priority</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – priority within the job queue. |
| Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW</p></li> |
| <li><p><strong>mapred_job_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – This name will appear in the jobtracker. |
| This can make monitoring easier.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook._get_proxy_user"> |
| <code class="sig-name descname">_get_proxy_user</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook._get_proxy_user"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook._get_proxy_user" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This function set the proper proxy_user value in case the user overwtire the default.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook._prepare_cli_cmd"> |
| <code class="sig-name descname">_prepare_cli_cmd</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook._prepare_cli_cmd"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook._prepare_cli_cmd" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This function creates the command list from available information</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook._prepare_hiveconf"> |
| <em class="property">static </em><code class="sig-name descname">_prepare_hiveconf</code><span class="sig-paren">(</span><em class="sig-param">d</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook._prepare_hiveconf"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook._prepare_hiveconf" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This function prepares a list of hiveconf params |
| from a dictionary of key value pairs.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><p><strong>d</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – </p> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveCliHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">hive_conf</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"hive.exec.dynamic.partition"</span><span class="p">:</span> <span class="s2">"true"</span><span class="p">,</span> |
| <span class="gp">... </span><span class="s2">"hive.exec.dynamic.partition.mode"</span><span class="p">:</span> <span class="s2">"nonstrict"</span><span class="p">}</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">_prepare_hiveconf</span><span class="p">(</span><span class="n">hive_conf</span><span class="p">)</span> |
| <span class="go">["-hiveconf", "hive.exec.dynamic.partition=true", "-hiveconf", "hive.exec.dynamic.partition.mode=nonstrict"]</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.run_cli"> |
| <code class="sig-name descname">run_cli</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema=None</em>, <em class="sig-param">verbose=True</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.run_cli"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.run_cli" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run an hql statement using the hive cli. If hive_conf is specified |
| it should be a dict and the entries will be set as key/value pairs |
| in HiveConf</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – if specified these key value pairs will be passed |
| to hive as <code class="docutils literal notranslate"><span class="pre">-hiveconf</span> <span class="pre">"key"="value"</span></code>. Note that they will be |
| passed after the <code class="docutils literal notranslate"><span class="pre">hive_cli_params</span></code> and thus will override |
| whatever values are specified in the database.</p> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveCliHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">result</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">run_cli</span><span class="p">(</span><span class="s2">"USE airflow;"</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="p">(</span><span class="s2">"OK"</span> <span class="ow">in</span> <span class="n">result</span><span class="p">)</span> |
| <span class="go">True</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.test_hql"> |
| <code class="sig-name descname">test_hql</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.test_hql"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.test_hql" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Test an hql statement using the hive cli and EXPLAIN</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.load_df"> |
| <code class="sig-name descname">load_df</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">df</em>, <em class="sig-param">table</em>, <em class="sig-param">field_dict=None</em>, <em class="sig-param">delimiter='</em>, <em class="sig-param">'</em>, <em class="sig-param">encoding='utf8'</em>, <em class="sig-param">pandas_kwargs=None</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.load_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.load_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a pandas DataFrame into hive.</p> |
| <p>Hive data types will be inferred if not passed but column names will |
| not be sanitized.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>df</strong> (<a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html#pandas.DataFrame" title="(in pandas v1.0.1)"><em>pandas.DataFrame</em></a>) – DataFrame to load into a Hive table</p></li> |
| <li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – target Hive table, use dot notation to target a |
| specific database</p></li> |
| <li><p><strong>field_dict</strong> (<a class="reference external" href="https://docs.python.org/3/library/collections.html#collections.OrderedDict" title="(in Python v3.8)"><em>collections.OrderedDict</em></a>) – mapping from column name to hive data type. |
| Note that it must be OrderedDict so as to keep columns’ order.</p></li> |
| <li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – field delimiter in the file</p></li> |
| <li><p><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – str encoding to use when writing DataFrame to file</p></li> |
| <li><p><strong>pandas_kwargs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – passed to DataFrame.to_csv</p></li> |
| <li><p><strong>kwargs</strong> – passed to self.load_file</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.load_file"> |
| <code class="sig-name descname">load_file</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">filepath</em>, <em class="sig-param">table</em>, <em class="sig-param">delimiter='</em>, <em class="sig-param">'</em>, <em class="sig-param">field_dict=None</em>, <em class="sig-param">create=True</em>, <em class="sig-param">overwrite=True</em>, <em class="sig-param">partition=None</em>, <em class="sig-param">recreate=False</em>, <em class="sig-param">tblproperties=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a local file into Hive</p> |
| <p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the tables gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>filepath</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – local filepath of the file to load</p></li> |
| <li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – target Hive table, use dot notation to target a |
| specific database</p></li> |
| <li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – field delimiter in the file</p></li> |
| <li><p><strong>field_dict</strong> (<a class="reference external" href="https://docs.python.org/3/library/collections.html#collections.OrderedDict" title="(in Python v3.8)"><em>collections.OrderedDict</em></a>) – A dictionary of the fields name in the file |
| as keys and their Hive types as values. |
| Note that it must be OrderedDict so as to keep columns’ order.</p></li> |
| <li><p><strong>create</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – whether to create the table if it doesn’t exist</p></li> |
| <li><p><strong>overwrite</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – whether to overwrite the data in table or partition</p></li> |
| <li><p><strong>partition</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – target partition as a dict of partition columns |
| and values</p></li> |
| <li><p><strong>recreate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – whether to drop and recreate the table at every |
| execution</p></li> |
| <li><p><strong>tblproperties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – TBLPROPERTIES of the hive table being created</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.kill"> |
| <code class="sig-name descname">kill</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveCliHook.kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.kill" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HiveMetastoreHook</code><span class="sig-paren">(</span><em class="sig-param">metastore_conn_id='metastore_default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="../base_hook/index.html#airflow.hooks.base_hook.BaseHook" title="airflow.hooks.base_hook.BaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></a></p> |
| <p>Wrapper to interact with the Hive Metastore</p> |
| <dl class="attribute"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.MAX_PART_COUNT"> |
| <code class="sig-name descname">MAX_PART_COUNT</code><em class="property"> = 32767</em><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.MAX_PART_COUNT"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.MAX_PART_COUNT" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.__getstate__"> |
| <code class="sig-name descname">__getstate__</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.__getstate__"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.__getstate__" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.__setstate__"> |
| <code class="sig-name descname">__setstate__</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">d</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.__setstate__"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.__setstate__" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_metastore_client"> |
| <code class="sig-name descname">get_metastore_client</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_metastore_client"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_metastore_client" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Hive thrift client.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook._find_valid_server"> |
| <code class="sig-name descname">_find_valid_server</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook._find_valid_server"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook._find_valid_server" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_conn"> |
| <code class="sig-name descname">get_conn</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_partition"> |
| <code class="sig-name descname">check_for_partition</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table</em>, <em class="sig-param">partition</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.check_for_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a partition exists</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Name of hive schema (database) @table belongs to</p></li> |
| <li><p><strong>table</strong> – Name of hive table @partition belongs to</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Partition</dt> |
| <dd class="field-even"><p>Expression that matches the partitions to check for |
| (eg <cite>a = ‘b’ AND c = ‘d’</cite>)</p> |
| </dd> |
| <dt class="field-odd">Return type</dt> |
| <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)">bool</a></p> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds='2015-01-01'"</span><span class="p">)</span> |
| <span class="go">True</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition"> |
| <code class="sig-name descname">check_for_named_partition</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table</em>, <em class="sig-param">partition_name</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.check_for_named_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a partition with a given name exists</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Name of hive schema (database) @table belongs to</p></li> |
| <li><p><strong>table</strong> – Name of hive table @partition belongs to</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Partition</dt> |
| <dd class="field-even"><p>Name of the partitions to check for (eg <cite>a=b/c=d</cite>)</p> |
| </dd> |
| <dt class="field-odd">Return type</dt> |
| <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)">bool</a></p> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_named_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds=2015-01-01"</span><span class="p">)</span> |
| <span class="go">True</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_named_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds=xxx"</span><span class="p">)</span> |
| <span class="go">False</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_table"> |
| <code class="sig-name descname">get_table</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">table_name</em>, <em class="sig-param">db='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_table"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_table" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a metastore table object</p> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_table</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">'static_babynames'</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">t</span><span class="o">.</span><span class="n">tableName</span> |
| <span class="go">'static_babynames'</span> |
| <span class="gp">>>> </span><span class="p">[</span><span class="n">col</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">sd</span><span class="o">.</span><span class="n">cols</span><span class="p">]</span> |
| <span class="go">['state', 'year', 'name', 'gender', 'num']</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_tables"> |
| <code class="sig-name descname">get_tables</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">db</em>, <em class="sig-param">pattern='*'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_tables"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_tables" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a metastore table object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_databases"> |
| <code class="sig-name descname">get_databases</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">pattern='*'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_databases"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_databases" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a metastore table object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_partitions"> |
| <code class="sig-name descname">get_partitions</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table_name</em>, <em class="sig-param">filter=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_partitions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_partitions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of all partitions in a table. Works only |
| for tables with less than 32767 (java short max val). |
| For subpartitioned table, the number might easily exceed this.</p> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">parts</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_partitions</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">t</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">parts</span><span class="p">)</span> |
| <span class="go">1</span> |
| <span class="gp">>>> </span><span class="n">parts</span> |
| <span class="go">[{'ds': '2015-01-01'}]</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook._get_max_partition_from_part_specs"> |
| <em class="property">static </em><code class="sig-name descname">_get_max_partition_from_part_specs</code><span class="sig-paren">(</span><em class="sig-param">part_specs</em>, <em class="sig-param">partition_key</em>, <em class="sig-param">filter_map</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook._get_max_partition_from_part_specs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook._get_max_partition_from_part_specs" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Helper method to get max partition of partitions with partition_key |
| from part specs. key:value pair in filter_map will be used to |
| filter out partitions.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>part_specs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – list of partition specs.</p></li> |
| <li><p><strong>partition_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – partition key name.</p></li> |
| <li><p><strong>filter_map</strong> (<em>map</em>) – partition_key:partition_value map used for partition filtering, |
| e.g. {‘key1’: ‘value1’, ‘key2’: ‘value2’}. |
| Only partitions matching all partition_key:partition_value |
| pairs will be considered as candidates of max partition.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><p>Max partition or None if part_specs is empty.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.max_partition"> |
| <code class="sig-name descname">max_partition</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema</em>, <em class="sig-param">table_name</em>, <em class="sig-param">field=None</em>, <em class="sig-param">filter_map=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.max_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.max_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the maximum value for all partitions with given field in a table. |
| If only one partition key exist in the table, the key will be used as field. |
| filter_map should be a partition_key:partition_value map and will be used to |
| filter out partitions.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – schema name.</p></li> |
| <li><p><strong>table_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – table name.</p></li> |
| <li><p><strong>field</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – partition key to get max partition from.</p></li> |
| <li><p><strong>filter_map</strong> (<em>map</em>) – partition_key:partition_value map used for partition filtering.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">filter_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'ds'</span><span class="p">:</span> <span class="s1">'2015-01-01'</span><span class="p">,</span> <span class="s1">'ds'</span><span class="p">:</span> <span class="s1">'2014-01-01'</span><span class="p">}</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">max_partition</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="o">...</span> <span class="n">table_name</span><span class="o">=</span><span class="n">t</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="s1">'ds'</span><span class="p">,</span> <span class="n">filter_map</span><span class="o">=</span><span class="n">filter_map</span><span class="p">)</span> |
| <span class="go">'2015-01-01'</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.table_exists"> |
| <code class="sig-name descname">table_exists</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">table_name</em>, <em class="sig-param">db='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.table_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if table exists</p> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">'static_babynames'</span><span class="p">)</span> |
| <span class="go">True</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">'does_not_exist'</span><span class="p">)</span> |
| <span class="go">False</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.hooks.hive_hooks.</code><code class="sig-name descname">HiveServer2Hook</code><span class="sig-paren">(</span><em class="sig-param">hiveserver2_conn_id='hiveserver2_default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="../base_hook/index.html#airflow.hooks.base_hook.BaseHook" title="airflow.hooks.base_hook.BaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></a></p> |
| <p>Wrapper around the pyhive library</p> |
| <p>Notes: |
| * the default authMechanism is PLAIN, to override it you |
| can specify it in the <code class="docutils literal notranslate"><span class="pre">extra</span></code> of your connection in the UI |
| * the default for run_set_variable_statements is true, if you |
| are using impala you may need to set it to false in the |
| <code class="docutils literal notranslate"><span class="pre">extra</span></code> of your connection in the UI</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_conn"> |
| <code class="sig-name descname">get_conn</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">schema=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Hive connection object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook._get_results"> |
| <code class="sig-name descname">_get_results</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em>, <em class="sig-param">fetch_size=None</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook._get_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook._get_results" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_results"> |
| <code class="sig-name descname">get_results</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em>, <em class="sig-param">fetch_size=None</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_results" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get results of the provided hql in target schema.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – hql to be executed.</p></li> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – target schema, default to ‘default’.</p></li> |
| <li><p><strong>fetch_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – max size of result to fetch.</p></li> |
| <li><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – hive_conf to execute alone with the hql.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><p>results of hql execution, dict with data (list of results) and header</p> |
| </dd> |
| <dt class="field-odd">Return type</dt> |
| <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)">dict</a></p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.to_csv"> |
| <code class="sig-name descname">to_csv</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">csv_filepath</em>, <em class="sig-param">schema='default'</em>, <em class="sig-param">delimiter='</em>, <em class="sig-param">'</em>, <em class="sig-param">lineterminator='rn'</em>, <em class="sig-param">output_header=True</em>, <em class="sig-param">fetch_size=1000</em>, <em class="sig-param">hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.to_csv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.to_csv" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute hql in target schema and write results to a csv file.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – hql to be executed.</p></li> |
| <li><p><strong>csv_filepath</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – filepath of csv to write results into.</p></li> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – target schema, default to ‘default’.</p></li> |
| <li><p><strong>delimiter</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – delimiter of the csv file, default to ‘,’.</p></li> |
| <li><p><strong>lineterminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – lineterminator of the csv file.</p></li> |
| <li><p><strong>output_header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – header of the csv file, default to True.</p></li> |
| <li><p><strong>fetch_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – number of result rows to write into the csv file, default to 1000.</p></li> |
| <li><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – hive_conf to execute alone with the hql.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_records"> |
| <code class="sig-name descname">get_records</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_records" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a set of records from a Hive query.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – hql to be executed.</p></li> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – target schema, default to ‘default’.</p></li> |
| <li><p><strong>hive_conf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – hive_conf to execute alone with the hql.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><p>result of hive execution</p> |
| </dd> |
| <dt class="field-odd">Return type</dt> |
| <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)">list</a></p> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveServer2Hook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT * FROM airflow.static_babynames LIMIT 100"</span> |
| <span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">hh</span><span class="o">.</span><span class="n">get_records</span><span class="p">(</span><span class="n">sql</span><span class="p">))</span> |
| <span class="go">100</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_pandas_df"> |
| <code class="sig-name descname">get_pandas_df</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">hql</em>, <em class="sig-param">schema='default'</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a pandas dataframe from a Hive query</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>hql</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – hql to be executed.</p></li> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – target schema, default to ‘default’.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><p>result of hql execution</p> |
| </dd> |
| <dt class="field-odd">Return type</dt> |
| <dd class="field-odd"><p>DataFrame</p> |
| </dd> |
| </dl> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveServer2Hook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT * FROM airflow.static_babynames LIMIT 100"</span> |
| <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_pandas_df</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="p">)</span> |
| <span class="go">100</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns</dt> |
| <dd class="field-odd"><p>pandas.DateFrame</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| |
| |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="../http_hook/index.html" class="btn btn-neutral float-right" title="airflow.hooks.http_hook" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a> |
| |
| |
| <a href="../hdfs_hook/index.html" class="btn btn-neutral float-left" title="airflow.hooks.hdfs_hook" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| <div class="footer">This page uses <a href="https://analytics.google.com/"> |
| Google Analytics</a> to collect statistics. You can disable it by blocking |
| the JavaScript coming from www.google-analytics.com. Check our |
| <a href="../../../../privacy_notice.html">Privacy Policy</a> |
| for more details. |
| </div> |
| |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| |
| |
| |
| <!-- Theme Analytics --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); |
| |
| ga('create', 'UA-140539454-1', 'auto'); |
| ga('send', 'pageview'); |
| </script> |
| |
| |
| |
| |
| </body> |
| </html> |