| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>API Reference — Airflow Documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| <link rel="prev" title="FAQ" href="faq.html" /> |
| |
| |
| <script src="_static/js/modernizr.min.js"></script> |
| |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search"> |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> Airflow |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| <div class="version"> |
| 1.10.2 |
| </div> |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling & Triggers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="api.html">Experimental Rest API</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="integration.html">Integration</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="metrics.html">Metrics</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="kubernetes.html">Kubernetes</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="changelog.html">Changelog</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li> |
| <li class="toctree-l1 current"><a class="current reference internal" href="#">API Reference</a><ul> |
| <li class="toctree-l2"><a class="reference internal" href="#operators">Operators</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#baseoperator">BaseOperator</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#basesensoroperator">BaseSensorOperator</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#core-operators">Core Operators</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#id1">Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sensors">Sensors</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#community-contributed-operators">Community-contributed Operators</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#id2">Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id9">Sensors</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#macros">Macros</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#default-variables">Default Variables</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#id15">Macros</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#models">Models</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="#hooks">Hooks</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#community-contributed-hooks">Community contributed hooks</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#executors">Executors</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#community-contributed-executors">Community-contributed executors</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">Airflow</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="index.html">Docs</a> »</li> |
| |
| <li>API Reference</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="_sources/code.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="api-reference"> |
| <h1>API Reference<a class="headerlink" href="#api-reference" title="Permalink to this headline">¶</a></h1> |
| <div class="section" id="operators"> |
| <span id="api-reference-operators"></span><h2>Operators<a class="headerlink" href="#operators" title="Permalink to this headline">¶</a></h2> |
| <p>Operators allow for generation of certain types of tasks that become nodes in |
| the DAG when instantiated. All operators derive from <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code> and |
| inherit many attributes and methods that way. Refer to the <a class="reference internal" href="#baseoperator">BaseOperator</a> |
| documentation for more details.</p> |
| <p>There are 3 main types of operators:</p> |
| <ul class="simple"> |
| <li>Operators that performs an <strong>action</strong>, or tell another system to |
| perform an action</li> |
| <li><strong>Transfer</strong> operators move data from one system to another</li> |
| <li><strong>Sensors</strong> are a certain type of operator that will keep running until a |
| certain criterion is met. Examples include a specific file landing in HDFS or |
| S3, a partition appearing in Hive, or a specific time of the day. Sensors |
| are derived from <code class="docutils literal notranslate"><span class="pre">BaseSensorOperator</span></code> and run a poke |
| method at a specified <code class="docutils literal notranslate"><span class="pre">poke_interval</span></code> until it returns <code class="docutils literal notranslate"><span class="pre">True</span></code>.</li> |
| </ul> |
| <div class="section" id="baseoperator"> |
| <h3>BaseOperator<a class="headerlink" href="#baseoperator" title="Permalink to this headline">¶</a></h3> |
| <p>All operators are derived from <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code> and acquire much |
| functionality through inheritance. Since this is the core of the engine, |
| it’s worth taking the time to understand the parameters of <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code> |
| to understand the primitive features that can be leveraged in your |
| DAGs.</p> |
| <dl class="class"> |
| <dt id="airflow.models.BaseOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">BaseOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Abstract base class for all operators. Since operators create objects that |
| become nodes in the dag, BaseOperator contains many recursive methods for |
| dag crawling behavior. To derive this class, you are expected to override |
| the constructor as well as the ‘execute’ method.</p> |
| <p>Operators derived from this class should perform or trigger certain tasks |
| synchronously (wait for completion). Example of operators could be an |
| operator that runs a Pig job (PigOperator), a sensor operator that |
| waits for a partition to land in Hive (HiveSensorOperator), or one that |
| moves data from Hive to MySQL (Hive2MySqlOperator). Instances of these |
| operators (tasks) target specific operations, running specific scripts, |
| functions or data transfers.</p> |
| <p>This class is abstract and shouldn’t be instantiated. Instantiating a |
| class derived from this one results in the creation of a task object, |
| which ultimately becomes a node in DAG objects. Task dependencies should |
| be set by using the set_upstream and/or set_downstream methods.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>task_id</strong> (<em>string</em>) – a unique, meaningful id for the task</li> |
| <li><strong>owner</strong> (<em>string</em>) – the owner of the task, using the unix username is recommended</li> |
| <li><strong>retries</strong> (<em>int</em>) – the number of retries that should be performed before |
| failing the task</li> |
| <li><strong>retry_delay</strong> (<em>timedelta</em>) – delay between retries</li> |
| <li><strong>retry_exponential_backoff</strong> (<em>bool</em>) – allow progressive longer waits between |
| retries by using exponential backoff algorithm on retry delay (delay |
| will be converted into seconds)</li> |
| <li><strong>max_retry_delay</strong> (<em>timedelta</em>) – maximum delay interval between retries</li> |
| <li><strong>start_date</strong> (<em>datetime</em>) – The <code class="docutils literal notranslate"><span class="pre">start_date</span></code> for the task, determines |
| the <code class="docutils literal notranslate"><span class="pre">execution_date</span></code> for the first task instance. The best practice |
| is to have the start_date rounded |
| to your DAG’s <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code>. Daily jobs have their start_date |
| some day at 00:00:00, hourly jobs have their start_date at 00:00 |
| of a specific hour. Note that Airflow simply looks at the latest |
| <code class="docutils literal notranslate"><span class="pre">execution_date</span></code> and adds the <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code> to determine |
| the next <code class="docutils literal notranslate"><span class="pre">execution_date</span></code>. It is also very important |
| to note that different tasks’ dependencies |
| need to line up in time. If task A depends on task B and their |
| start_date are offset in a way that their execution_date don’t line |
| up, A’s dependencies will never be met. If you are looking to delay |
| a task, for example running a daily task at 2AM, look into the |
| <code class="docutils literal notranslate"><span class="pre">TimeSensor</span></code> and <code class="docutils literal notranslate"><span class="pre">TimeDeltaSensor</span></code>. We advise against using |
| dynamic <code class="docutils literal notranslate"><span class="pre">start_date</span></code> and recommend using fixed ones. Read the |
| FAQ entry about start_date for more information.</li> |
| <li><strong>end_date</strong> (<em>datetime</em>) – if specified, the scheduler won’t go beyond this date</li> |
| <li><strong>depends_on_past</strong> (<em>bool</em>) – when set to true, task instances will run |
| sequentially while relying on the previous task’s schedule to |
| succeed. The task instance for the start_date is allowed to run.</li> |
| <li><strong>wait_for_downstream</strong> (<em>bool</em>) – when set to true, an instance of task |
| X will wait for tasks immediately downstream of the previous instance |
| of task X to finish successfully before it runs. This is useful if the |
| different instances of a task X alter the same asset, and this asset |
| is used by tasks downstream of task X. Note that depends_on_past |
| is forced to True wherever wait_for_downstream is used.</li> |
| <li><strong>queue</strong> (<em>str</em>) – which queue to target when running this job. Not |
| all executors implement queue management, the CeleryExecutor |
| does support targeting specific queues.</li> |
| <li><strong>dag</strong> (<a class="reference internal" href="#airflow.models.DAG" title="airflow.models.DAG"><em>DAG</em></a>) – a reference to the dag the task is attached to (if any)</li> |
| <li><strong>priority_weight</strong> (<em>int</em>) – priority weight of this task against other task. |
| This allows the executor to trigger higher priority tasks before |
| others when things get backed up.</li> |
| <li><strong>weight_rule</strong> (<em>str</em>) – weighting method used for the effective total |
| priority weight of the task. Options are: |
| <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">downstream</span> <span class="pre">|</span> <span class="pre">upstream</span> <span class="pre">|</span> <span class="pre">absolute</span> <span class="pre">}</span></code> default is <code class="docutils literal notranslate"><span class="pre">downstream</span></code> |
| When set to <code class="docutils literal notranslate"><span class="pre">downstream</span></code> the effective weight of the task is the |
| aggregate sum of all downstream descendants. As a result, upstream |
| tasks will have higher weight and will be scheduled more aggressively |
| when using positive weight values. This is useful when you have |
| multiple dag run instances and desire to have all upstream tasks to |
| complete for all runs before each dag can continue processing |
| downstream tasks. When set to <code class="docutils literal notranslate"><span class="pre">upstream</span></code> the effective weight is the |
| aggregate sum of all upstream ancestors. This is the opposite where |
| downtream tasks have higher weight and will be scheduled more |
| aggressively when using positive weight values. This is useful when you |
| have multiple dag run instances and prefer to have each dag complete |
| before starting upstream tasks of other dags. When set to |
| <code class="docutils literal notranslate"><span class="pre">absolute</span></code>, the effective weight is the exact <code class="docutils literal notranslate"><span class="pre">priority_weight</span></code> |
| specified without additional weighting. You may want to do this when |
| you know exactly what priority weight each task should have. |
| Additionally, when set to <code class="docutils literal notranslate"><span class="pre">absolute</span></code>, there is bonus effect of |
| significantly speeding up the task creation process as for very large |
| DAGS. Options can be set as string or using the constants defined in |
| the static class <code class="docutils literal notranslate"><span class="pre">airflow.utils.WeightRule</span></code></li> |
| <li><strong>pool</strong> (<em>str</em>) – the slot pool this task should run in, slot pools are a |
| way to limit concurrency for certain tasks</li> |
| <li><strong>sla</strong> (<em>datetime.timedelta</em>) – time by which the job is expected to succeed. Note that |
| this represents the <code class="docutils literal notranslate"><span class="pre">timedelta</span></code> after the period is closed. For |
| example if you set an SLA of 1 hour, the scheduler would send an email |
| soon after 1:00AM on the <code class="docutils literal notranslate"><span class="pre">2016-01-02</span></code> if the <code class="docutils literal notranslate"><span class="pre">2016-01-01</span></code> instance |
| has not succeeded yet. |
| The scheduler pays special attention for jobs with an SLA and |
| sends alert |
| emails for sla misses. SLA misses are also recorded in the database |
| for future reference. All tasks that share the same SLA time |
| get bundled in a single email, sent soon after that time. SLA |
| notification are sent once and only once for each task instance.</li> |
| <li><strong>execution_timeout</strong> (<em>datetime.timedelta</em>) – max time allowed for the execution of |
| this task instance, if it goes beyond it will raise and fail.</li> |
| <li><strong>on_failure_callback</strong> (<em>callable</em>) – a function to be called when a task instance |
| of this task fails. a context dictionary is passed as a single |
| parameter to this function. Context contains references to related |
| objects to the task instance and is documented under the macros |
| section of the API.</li> |
| <li><strong>on_retry_callback</strong> (<em>callable</em>) – much like the <code class="docutils literal notranslate"><span class="pre">on_failure_callback</span></code> except |
| that it is executed when retries occur.</li> |
| <li><strong>on_success_callback</strong> (<em>callable</em>) – much like the <code class="docutils literal notranslate"><span class="pre">on_failure_callback</span></code> except |
| that it is executed when the task succeeds.</li> |
| <li><strong>trigger_rule</strong> (<em>str</em>) – defines the rule by which dependencies are applied |
| for the task to get triggered. Options are: |
| <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">all_success</span> <span class="pre">|</span> <span class="pre">all_failed</span> <span class="pre">|</span> <span class="pre">all_done</span> <span class="pre">|</span> <span class="pre">one_success</span> <span class="pre">|</span> |
| <span class="pre">one_failed</span> <span class="pre">|</span> <span class="pre">none_failed</span> <span class="pre">|</span> <span class="pre">dummy}</span></code> |
| default is <code class="docutils literal notranslate"><span class="pre">all_success</span></code>. Options can be set as string or |
| using the constants defined in the static class |
| <code class="docutils literal notranslate"><span class="pre">airflow.utils.TriggerRule</span></code></li> |
| <li><strong>resources</strong> (<em>dict</em>) – A map of resource parameter names (the argument names of the |
| Resources constructor) to their values.</li> |
| <li><strong>run_as_user</strong> (<em>str</em>) – unix username to impersonate while running the task</li> |
| <li><strong>task_concurrency</strong> (<em>int</em>) – When set, a task will be able to limit the concurrent |
| runs across execution_dates</li> |
| <li><strong>executor_config</strong> (<em>dict</em>) – <p>Additional task-level configuration parameters that are |
| interpreted by a specific executor. Parameters are namespaced by the name of |
| executor.</p> |
| <p><strong>Example</strong>: to run this task in a specific docker container through |
| the KubernetesExecutor</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MyOperator</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> |
| <span class="n">executor_config</span><span class="o">=</span><span class="p">{</span> |
| <span class="s2">"KubernetesExecutor"</span><span class="p">:</span> |
| <span class="p">{</span><span class="s2">"image"</span><span class="p">:</span> <span class="s2">"myCustomDockerImage"</span><span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.clear"> |
| <code class="descname">clear</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.clear"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.clear" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears the state of task instances associated with the task, following |
| the parameters specified.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.BaseOperator.dag"> |
| <code class="descname">dag</code><a class="headerlink" href="#airflow.models.BaseOperator.dag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the Operator’s DAG if set, otherwise raises an error</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.BaseOperator.deps"> |
| <code class="descname">deps</code><a class="headerlink" href="#airflow.models.BaseOperator.deps" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the list of dependencies for the operator. These differ from execution |
| context dependencies in that they are specific to tasks and can be |
| extended/overridden by subclasses.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.BaseOperator.downstream_list"> |
| <code class="descname">downstream_list</code><a class="headerlink" href="#airflow.models.BaseOperator.downstream_list" title="Permalink to this definition">¶</a></dt> |
| <dd><p>@property: list of tasks directly downstream</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.get_direct_relative_ids"> |
| <code class="descname">get_direct_relative_ids</code><span class="sig-paren">(</span><em>upstream=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_direct_relative_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.get_direct_relative_ids" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the direct relative ids to the current task, upstream or |
| downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.get_direct_relatives"> |
| <code class="descname">get_direct_relatives</code><span class="sig-paren">(</span><em>upstream=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_direct_relatives"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.get_direct_relatives" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the direct relatives to the current task, upstream or |
| downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.get_flat_relative_ids"> |
| <code class="descname">get_flat_relative_ids</code><span class="sig-paren">(</span><em>upstream=False</em>, <em>found_descendants=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_flat_relative_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.get_flat_relative_ids" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a flat list of relatives’ ids, either upstream or downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.get_flat_relatives"> |
| <code class="descname">get_flat_relatives</code><span class="sig-paren">(</span><em>upstream=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_flat_relatives"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.get_flat_relatives" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a flat list of relatives, either upstream or downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.get_task_instances"> |
| <code class="descname">get_task_instances</code><span class="sig-paren">(</span><em>session</em>, <em>start_date=None</em>, <em>end_date=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_task_instances"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.get_task_instances" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a set of task instance related to this task for a specific date |
| range.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.has_dag"> |
| <code class="descname">has_dag</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.has_dag"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.has_dag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns True if the Operator has been assigned to a DAG.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.on_kill"> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Override this method to cleanup subprocesses when a task instance |
| gets killed. Any use of the threading, subprocess or multiprocessing |
| module within an operator needs to be cleaned up or it will leave |
| ghost processes behind.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.post_execute"> |
| <code class="descname">post_execute</code><span class="sig-paren">(</span><em>context</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.post_execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.post_execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This hook is triggered right after self.execute() is called. |
| It is passed the execution context and any results returned by the |
| operator.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.pre_execute"> |
| <code class="descname">pre_execute</code><span class="sig-paren">(</span><em>context</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.pre_execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.pre_execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This hook is triggered right before self.execute() is called.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.prepare_template"> |
| <code class="descname">prepare_template</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.prepare_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.prepare_template" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Hook that is triggered after the templated fields get replaced |
| by their content. If you need your operator to alter the |
| content of the file before the template is rendered, |
| it should override this method to do so.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.render_template"> |
| <code class="descname">render_template</code><span class="sig-paren">(</span><em>attr</em>, <em>content</em>, <em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.render_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.render_template" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Renders a template either from a file or directly in a field, and returns |
| the rendered result.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.render_template_from_field"> |
| <code class="descname">render_template_from_field</code><span class="sig-paren">(</span><em>attr</em>, <em>content</em>, <em>context</em>, <em>jinja_env</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.render_template_from_field"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.render_template_from_field" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Renders a template from a field. If the field is a string, it will |
| simply render the string and return the result. If it is a collection or |
| nested set of collections, it will traverse the structure and render |
| all strings in it.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.run"> |
| <code class="descname">run</code><span class="sig-paren">(</span><em>start_date=None</em>, <em>end_date=None</em>, <em>ignore_first_depends_on_past=False</em>, <em>ignore_ti_state=False</em>, <em>mark_success=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.run" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run a set of task instances for a date range.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.BaseOperator.schedule_interval"> |
| <code class="descname">schedule_interval</code><a class="headerlink" href="#airflow.models.BaseOperator.schedule_interval" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The schedule interval of the DAG always wins over individual tasks so |
| that tasks within a DAG always line up. The task still needs a |
| schedule_interval as it may not be attached to a DAG.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.set_downstream"> |
| <code class="descname">set_downstream</code><span class="sig-paren">(</span><em>task_or_task_list</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.set_downstream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.set_downstream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Set a task or a task list to be directly downstream from the current |
| task.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.set_upstream"> |
| <code class="descname">set_upstream</code><span class="sig-paren">(</span><em>task_or_task_list</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.set_upstream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.set_upstream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Set a task or a task list to be directly upstream from the current |
| task.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.BaseOperator.upstream_list"> |
| <code class="descname">upstream_list</code><a class="headerlink" href="#airflow.models.BaseOperator.upstream_list" title="Permalink to this definition">¶</a></dt> |
| <dd><p>@property: list of tasks directly upstream</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.xcom_pull"> |
| <code class="descname">xcom_pull</code><span class="sig-paren">(</span><em>context</em>, <em>task_ids=None</em>, <em>dag_id=None</em>, <em>key=u'return_value'</em>, <em>include_prior_dates=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.xcom_pull"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.xcom_pull" title="Permalink to this definition">¶</a></dt> |
| <dd><p>See TaskInstance.xcom_pull()</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.BaseOperator.xcom_push"> |
| <code class="descname">xcom_push</code><span class="sig-paren">(</span><em>context</em>, <em>key</em>, <em>value</em>, <em>execution_date=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.xcom_push"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.BaseOperator.xcom_push" title="Permalink to this definition">¶</a></dt> |
| <dd><p>See TaskInstance.xcom_push()</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="basesensoroperator"> |
| <h3>BaseSensorOperator<a class="headerlink" href="#basesensoroperator" title="Permalink to this headline">¶</a></h3> |
| <p>All sensors are derived from <code class="docutils literal notranslate"><span class="pre">BaseSensorOperator</span></code>. All sensors inherit |
| the <code class="docutils literal notranslate"><span class="pre">timeout</span></code> and <code class="docutils literal notranslate"><span class="pre">poke_interval</span></code> on top of the <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code> |
| attributes.</p> |
| <dl class="class"> |
| <dt id="airflow.sensors.base_sensor_operator.BaseSensorOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.base_sensor_operator.</code><code class="descname">BaseSensorOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/base_sensor_operator.html#BaseSensorOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.SkipMixin</span></code></p> |
| <p>Sensor operators are derived from this class and inherit these attributes.</p> |
| <p>Sensor operators keep executing at a time interval and succeed when |
| a criteria is met and fail if and when they time out.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>soft_fail</strong> (<em>bool</em>) – Set to true to mark the task as SKIPPED on failure</li> |
| <li><strong>poke_interval</strong> (<em>int</em>) – Time in seconds that the job should wait in |
| between each tries</li> |
| <li><strong>timeout</strong> (<em>int</em>) – Time, in seconds before the task times out and fails.</li> |
| <li><strong>mode</strong> (<em>str</em>) – How the sensor operates. |
| Options are: <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">poke</span> <span class="pre">|</span> <span class="pre">reschedule</span> <span class="pre">}</span></code>, default is <code class="docutils literal notranslate"><span class="pre">poke</span></code>. |
| When set to <code class="docutils literal notranslate"><span class="pre">poke</span></code> the sensor is taking up a worker slot for its |
| whole execution time and sleeps between pokes. Use this mode if the |
| expected runtime of the sensor is short or if a short poke interval |
| is requried. |
| When set to <code class="docutils literal notranslate"><span class="pre">reschedule</span></code> the sensor task frees the worker slot when |
| the criteria is not yet met and it’s rescheduled at a later time. Use |
| this mode if the expected time until the criteria is met is. The poke |
| inteval should be more than one minute to prevent too much load on |
| the scheduler.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="attribute"> |
| <dt id="airflow.sensors.base_sensor_operator.BaseSensorOperator.deps"> |
| <code class="descname">deps</code><a class="headerlink" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator.deps" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Adds one additional dependency for all sensor operators that |
| checks if a sensor task instance can be rescheduled.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.sensors.base_sensor_operator.BaseSensorOperator.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/base_sensor_operator.html#BaseSensorOperator.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="core-operators"> |
| <h3>Core Operators<a class="headerlink" href="#core-operators" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="id1"> |
| <h4>Operators<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.bash_operator.BashOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.bash_operator.</code><code class="descname">BashOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/bash_operator.html#BashOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.bash_operator.BashOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a Bash script, command or set of commands.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bash_command</strong> (<em>string</em>) – The command, set of commands or reference to a |
| bash script (must be ‘.sh’) to be executed. (templated)</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – If xcom_push is True, the last line written to stdout |
| will also be pushed to an XCom when the bash command completes.</li> |
| <li><strong>env</strong> (<em>dict</em>) – If env is not None, it must be a mapping that defines the |
| environment variables for the new process; these are used instead |
| of inheriting the current process environment, which is the default |
| behavior. (templated)</li> |
| <li><strong>output_encoding</strong> (<em>str</em>) – Output encoding of bash command</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.bash_operator.BashOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/bash_operator.html#BashOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.bash_operator.BashOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the bash command in a temporary directory |
| which will be cleaned afterwards</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.python_operator.BranchPythonOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.python_operator.</code><code class="descname">BranchPythonOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/python_operator.html#BranchPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.python_operator.BranchPythonOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.python_operator.PythonOperator" title="airflow.operators.python_operator.PythonOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.python_operator.PythonOperator</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.SkipMixin</span></code></p> |
| <p>Allows a workflow to “branch” or follow a single path following the |
| execution of this task.</p> |
| <p>It derives the PythonOperator and expects a Python function that returns |
| the task_id to follow. The task_id returned should point to a task |
| directly downstream from {self}. All other “branches” or |
| directly downstream tasks are marked with a state of <code class="docutils literal notranslate"><span class="pre">skipped</span></code> so that |
| these paths can’t move forward. The <code class="docutils literal notranslate"><span class="pre">skipped</span></code> states are propageted |
| downstream to allow for the DAG state to fill up and the DAG run’s state |
| to be inferred.</p> |
| <p>Note that using tasks with <code class="docutils literal notranslate"><span class="pre">depends_on_past=True</span></code> downstream from |
| <code class="docutils literal notranslate"><span class="pre">BranchPythonOperator</span></code> is logically unsound as <code class="docutils literal notranslate"><span class="pre">skipped</span></code> status |
| will invariably lead to block tasks that depend on their past successes. |
| <code class="docutils literal notranslate"><span class="pre">skipped</span></code> states propagates where all directly upstream tasks are |
| <code class="docutils literal notranslate"><span class="pre">skipped</span></code>.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.check_operator.CheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.check_operator.</code><code class="descname">CheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/check_operator.html#CheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.check_operator.CheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Performs checks against a db. The <code class="docutils literal notranslate"><span class="pre">CheckOperator</span></code> expects |
| a sql query that will return a single row. Each value on that |
| first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alerts |
| without stopping the progress of the DAG.</p> |
| <p>Note that this is an abstract class and get_db_hook |
| needs to be defined. Whereas a get_db_hook is hook that gets a |
| single record from an external source.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>sql</strong> (<em>string</em>) – the sql to be executed. (templated)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.dummy_operator.DummyOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.dummy_operator.</code><code class="descname">DummyOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/dummy_operator.html#DummyOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.dummy_operator.DummyOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator that does literally nothing. It can be used to group tasks in a |
| DAG.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.druid_check_operator.DruidCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.druid_check_operator.</code><code class="descname">DruidCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/druid_check_operator.html#DruidCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.druid_check_operator.DruidCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p> |
| <p>Performs checks against Druid. The <code class="docutils literal notranslate"><span class="pre">DruidCheckOperator</span></code> expects |
| a sql query that will return a single row. Each value on that |
| first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average. |
| This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alterts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>druid_broker_conn_id</strong> (<em>string</em>) – reference to the druid broker</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.druid_check_operator.DruidCheckOperator.get_db_hook"> |
| <code class="descname">get_db_hook</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/druid_check_operator.html#DruidCheckOperator.get_db_hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.druid_check_operator.DruidCheckOperator.get_db_hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the druid db api hook.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.operators.druid_check_operator.DruidCheckOperator.get_first"> |
| <code class="descname">get_first</code><span class="sig-paren">(</span><em>sql</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/druid_check_operator.html#DruidCheckOperator.get_first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.druid_check_operator.DruidCheckOperator.get_first" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the druid sql to druid broker and returns the first resulting row.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>sql</strong> (<em>str</em>) – the sql statement to be executed (str)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.email_operator.EmailOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.email_operator.</code><code class="descname">EmailOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/email_operator.html#EmailOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.email_operator.EmailOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Sends an email.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>to</strong> (<em>list</em><em> or </em><em>string</em><em> (</em><em>comma</em><em> or </em><em>semicolon delimited</em><em>)</em>) – list of emails to send the email to. (templated)</li> |
| <li><strong>subject</strong> (<em>string</em>) – subject line for the email. (templated)</li> |
| <li><strong>html_content</strong> (<em>string</em>) – content of the email, html markup |
| is allowed. (templated)</li> |
| <li><strong>files</strong> (<em>list</em>) – file names to attach in email</li> |
| <li><strong>cc</strong> (<em>list</em><em> or </em><em>string</em><em> (</em><em>comma</em><em> or </em><em>semicolon delimited</em><em>)</em>) – list of recipients to be added in CC field</li> |
| <li><strong>bcc</strong> (<em>list</em><em> or </em><em>string</em><em> (</em><em>comma</em><em> or </em><em>semicolon delimited</em><em>)</em>) – list of recipients to be added in BCC field</li> |
| <li><strong>mime_subtype</strong> (<em>string</em>) – MIME sub content type</li> |
| <li><strong>mime_charset</strong> (<em>string</em>) – character set parameter added to the Content-Type |
| header.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.generic_transfer.GenericTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.generic_transfer.</code><code class="descname">GenericTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/generic_transfer.html#GenericTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.generic_transfer.GenericTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from a connection to another, assuming that they both |
| provide the required methods in their respective hooks. The source hook |
| needs to expose a <cite>get_records</cite> method, and the destination a |
| <cite>insert_rows</cite> method.</p> |
| <p>This is meant to be used on small-ish datasets that fit in memory.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against the source database. (templated)</li> |
| <li><strong>destination_table</strong> (<em>str</em>) – target table. (templated)</li> |
| <li><strong>source_conn_id</strong> (<em>str</em>) – source connection</li> |
| <li><strong>destination_conn_id</strong> (<em>str</em>) – source connection</li> |
| <li><strong>preoperator</strong> (<em>str</em><em> or </em><em>list of str</em>) – sql statement or list of statements to be |
| executed prior to loading the data. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.hive_to_druid.HiveToDruidTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.hive_to_druid.</code><code class="descname">HiveToDruidTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/hive_to_druid.html#HiveToDruidTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.hive_to_druid.HiveToDruidTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from Hive to Druid, [del]note that for now the data is loaded |
| into memory before being pushed to Druid, so this operator should |
| be used for smallish amount of data.[/del]</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against the Druid database. (templated)</li> |
| <li><strong>druid_datasource</strong> (<em>str</em>) – the datasource you want to ingest into in druid</li> |
| <li><strong>ts_dim</strong> (<em>str</em>) – the timestamp dimension</li> |
| <li><strong>metric_spec</strong> (<em>list</em>) – the metrics you want to define for your data</li> |
| <li><strong>hive_cli_conn_id</strong> (<em>str</em>) – the hive connection id</li> |
| <li><strong>druid_ingest_conn_id</strong> (<em>str</em>) – the druid ingest connection id</li> |
| <li><strong>metastore_conn_id</strong> (<em>str</em>) – the metastore connection id</li> |
| <li><strong>hadoop_dependency_coordinates</strong> (<em>list of str</em>) – list of coordinates to squeeze |
| int the ingest json</li> |
| <li><strong>intervals</strong> (<em>list</em>) – list of time intervals that defines segments, |
| this is passed as is to the json object. (templated)</li> |
| <li><strong>hive_tblproperties</strong> (<em>dict</em>) – additional properties for tblproperties in |
| hive for the staging table</li> |
| <li><strong>job_properties</strong> (<em>dict</em>) – additional properties for job</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.hive_to_druid.HiveToDruidTransfer.construct_ingest_query"> |
| <code class="descname">construct_ingest_query</code><span class="sig-paren">(</span><em>static_path</em>, <em>columns</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/hive_to_druid.html#HiveToDruidTransfer.construct_ingest_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.hive_to_druid.HiveToDruidTransfer.construct_ingest_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Builds an ingest query for an HDFS TSV load.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>static_path</strong> (<em>str</em>) – The path on hdfs where the data is</li> |
| <li><strong>columns</strong> (<em>list</em>) – List of all the columns that are available</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.hive_to_mysql.HiveToMySqlTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.hive_to_mysql.</code><code class="descname">HiveToMySqlTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/hive_to_mysql.html#HiveToMySqlTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.hive_to_mysql.HiveToMySqlTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from Hive to MySQL, note that for now the data is loaded |
| into memory before being pushed to MySQL, so this operator should |
| be used for smallish amount of data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against Hive server. (templated)</li> |
| <li><strong>mysql_table</strong> (<em>str</em>) – target MySQL table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>mysql_conn_id</strong> (<em>str</em>) – source mysql connection</li> |
| <li><strong>hiveserver2_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| <li><strong>mysql_preoperator</strong> (<em>str</em>) – sql statement to run against mysql prior to |
| import, typically use to truncate of delete in place |
| of the data coming in, allowing the task to be idempotent (running |
| the task twice won’t double load data). (templated)</li> |
| <li><strong>mysql_postoperator</strong> (<em>str</em>) – sql statement to run against mysql after the |
| import, typically used to move data from staging to |
| production and issue cleanup commands. (templated)</li> |
| <li><strong>bulk_load</strong> (<em>bool</em>) – flag to use bulk_load option. This loads mysql directly |
| from a tab-delimited text file using the LOAD DATA LOCAL INFILE command. |
| This option requires an extra connection parameter for the |
| destination MySQL connection: {‘local_infile’: true}.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.hive_to_samba_operator.Hive2SambaOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.hive_to_samba_operator.</code><code class="descname">Hive2SambaOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/hive_to_samba_operator.html#Hive2SambaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.hive_to_samba_operator.Hive2SambaOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes hql code in a specific Hive database and loads the |
| results of the query as a csv to a Samba location.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hql</strong> (<em>string</em>) – the hql to be exported. (templated)</li> |
| <li><strong>hiveserver2_conn_id</strong> (<em>string</em>) – reference to the hiveserver2 service</li> |
| <li><strong>samba_conn_id</strong> (<em>string</em>) – reference to the samba destination</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.hive_operator.HiveOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.hive_operator.</code><code class="descname">HiveOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/hive_operator.html#HiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.hive_operator.HiveOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes hql code or hive script in a specific Hive database.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hql</strong> (<em>string</em>) – the hql to be executed. Note that you may also use |
| a relative path from the dag file of a (template) hive |
| script. (templated)</li> |
| <li><strong>hive_cli_conn_id</strong> (<em>string</em>) – reference to the Hive database. (templated)</li> |
| <li><strong>hiveconfs</strong> (<em>dict</em>) – if defined, these key value pairs will be passed |
| to hive as <code class="docutils literal notranslate"><span class="pre">-hiveconf</span> <span class="pre">"key"="value"</span></code></li> |
| <li><strong>hiveconf_jinja_translate</strong> (<em>boolean</em>) – when True, hiveconf-type templating |
| ${var} gets translated into jinja-type templating {{ var }} and |
| ${hiveconf:var} gets translated into jinja-type templating {{ var }}. |
| Note that you may want to use this along with the |
| <code class="docutils literal notranslate"><span class="pre">DAG(user_defined_macros=myargs)</span></code> parameter. View the DAG |
| object documentation for more details.</li> |
| <li><strong>script_begin_tag</strong> (<em>str</em>) – If defined, the operator will get rid of the |
| part of the script before the first occurrence of <cite>script_begin_tag</cite></li> |
| <li><strong>mapred_queue</strong> (<em>string</em>) – queue used by the Hadoop CapacityScheduler. (templated)</li> |
| <li><strong>mapred_queue_priority</strong> (<em>string</em>) – priority within CapacityScheduler queue. |
| Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW</li> |
| <li><strong>mapred_job_name</strong> (<em>string</em>) – This name will appear in the jobtracker. |
| This can make monitoring easier.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.hive_stats_operator.HiveStatsCollectionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.hive_stats_operator.</code><code class="descname">HiveStatsCollectionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/hive_stats_operator.html#HiveStatsCollectionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.hive_stats_operator.HiveStatsCollectionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Gathers partition statistics using a dynamically generated Presto |
| query, inserts the stats into a MySql table with this format. Stats |
| overwrite themselves if you rerun the same date/partition.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CREATE</span> <span class="n">TABLE</span> <span class="n">hive_stats</span> <span class="p">(</span> |
| <span class="n">ds</span> <span class="n">VARCHAR</span><span class="p">(</span><span class="mi">16</span><span class="p">),</span> |
| <span class="n">table_name</span> <span class="n">VARCHAR</span><span class="p">(</span><span class="mi">500</span><span class="p">),</span> |
| <span class="n">metric</span> <span class="n">VARCHAR</span><span class="p">(</span><span class="mi">200</span><span class="p">),</span> |
| <span class="n">value</span> <span class="n">BIGINT</span> |
| <span class="p">);</span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – the source table, in the format <code class="docutils literal notranslate"><span class="pre">database.table_name</span></code>. (templated)</li> |
| <li><strong>partition</strong> (<em>dict of {col:value}</em>) – the source partition. (templated)</li> |
| <li><strong>extra_exprs</strong> (<em>dict</em>) – dict of expression to run against the table where |
| keys are metric names and values are Presto compatible expressions</li> |
| <li><strong>col_blacklist</strong> (<em>list</em>) – list of columns to blacklist, consider |
| blacklisting blobs, large json columns, …</li> |
| <li><strong>assignment_func</strong> (<em>function</em>) – a function that receives a column name and |
| a type, and returns a dict of metric names and an Presto expressions. |
| If None is returned, the global defaults are applied. If an |
| empty dictionary is returned, no stats are computed for that |
| column.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.check_operator.IntervalCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.check_operator.</code><code class="descname">IntervalCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/check_operator.html#IntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.check_operator.IntervalCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Checks that the values of metrics given as SQL expressions are within |
| a certain tolerance of the ones from days_back before.</p> |
| <p>Note that this is an abstract class and get_db_hook |
| needs to be defined. Whereas a get_db_hook is hook that gets a |
| single record from an external source.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – the table name</li> |
| <li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check |
| against. Defaults to 7 days</li> |
| <li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.latest_only_operator.LatestOnlyOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.latest_only_operator.</code><code class="descname">LatestOnlyOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/latest_only_operator.html#LatestOnlyOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.latest_only_operator.LatestOnlyOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.SkipMixin</span></code></p> |
| <p>Allows a workflow to skip tasks that are not running during the most |
| recent schedule interval.</p> |
| <p>If the task is run outside of the latest schedule interval, all |
| directly downstream tasks will be skipped.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.mssql_operator.MsSqlOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.mssql_operator.</code><code class="descname">MsSqlOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/mssql_operator.html#MsSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.mssql_operator.MsSqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes sql code in a specific Microsoft SQL database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>mssql_conn_id</strong> (<em>string</em>) – reference to a specific mssql database</li> |
| <li><strong>sql</strong> (<em>string</em><em> or </em><em>string pointing to a template file with .sql |
| extension.</em><em> (</em><em>templated</em><em>)</em>) – the sql code to be executed</li> |
| <li><strong>database</strong> (<em>string</em>) – name of database which overwrite defined one in connection</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.mssql_to_hive.MsSqlToHiveTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.mssql_to_hive.</code><code class="descname">MsSqlToHiveTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/mssql_to_hive.html#MsSqlToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.mssql_to_hive.MsSqlToHiveTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from Microsoft SQL Server to Hive. The operator runs |
| your query against Microsoft SQL Server, stores the file locally |
| before loading it into a Hive table. If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or |
| <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, |
| a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated. |
| Hive data types are inferred from the cursor’s metadata. |
| Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the table gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against the Microsoft SQL Server |
| database. (templated)</li> |
| <li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a specific |
| database. (templated)</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every execution</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns and |
| values. (templated)</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>mssql_conn_id</strong> (<em>str</em>) – source Microsoft SQL Server connection</li> |
| <li><strong>hive_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| <li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.mysql_operator.MySqlOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.mysql_operator.</code><code class="descname">MySqlOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/mysql_operator.html#MySqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.mysql_operator.MySqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes sql code in a specific MySQL database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>mysql_conn_id</strong> (<em>string</em>) – reference to a specific mysql database</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'</em>) – the sql code to be executed. (templated)</li> |
| <li><strong>database</strong> (<em>string</em>) – name of database which overwrite defined one in connection</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.mysql_to_hive.MySqlToHiveTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.mysql_to_hive.</code><code class="descname">MySqlToHiveTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/mysql_to_hive.html#MySqlToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.mysql_to_hive.MySqlToHiveTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from MySql to Hive. The operator runs your query against |
| MySQL, stores the file locally before loading it into a Hive table. |
| If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, |
| a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated. |
| Hive data types are inferred from the cursor’s metadata. Note that the |
| table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the table gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against the MySQL database. (templated)</li> |
| <li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every |
| execution</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns |
| and values. (templated)</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>mysql_conn_id</strong> (<em>str</em>) – source mysql connection</li> |
| <li><strong>hive_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| <li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.pig_operator.PigOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.pig_operator.</code><code class="descname">PigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/pig_operator.html#PigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.pig_operator.PigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes pig script.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>pig</strong> (<em>string</em>) – the pig latin script to be executed. (templated)</li> |
| <li><strong>pig_cli_conn_id</strong> (<em>string</em>) – reference to the Hive database</li> |
| <li><strong>pigparams_jinja_translate</strong> (<em>boolean</em>) – when True, pig params-type templating |
| ${var} gets translated into jinja-type templating {{ var }}. Note that |
| you may want to use this along with the |
| <code class="docutils literal notranslate"><span class="pre">DAG(user_defined_macros=myargs)</span></code> parameter. View the DAG |
| object documentation for more details.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.postgres_operator.PostgresOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.postgres_operator.</code><code class="descname">PostgresOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/postgres_operator.html#PostgresOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.postgres_operator.PostgresOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes sql code in a specific Postgres database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>postgres_conn_id</strong> (<em>string</em>) – reference to a specific postgres database</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'</em>) – the sql code to be executed. (templated)</li> |
| <li><strong>database</strong> (<em>string</em>) – name of database which overwrite defined one in connection</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.presto_check_operator.PrestoCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.presto_check_operator.</code><code class="descname">PrestoCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/presto_check_operator.html#PrestoCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.presto_check_operator.PrestoCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p> |
| <p>Performs checks against Presto. The <code class="docutils literal notranslate"><span class="pre">PrestoCheckOperator</span></code> expects |
| a sql query that will return a single row. Each value on that |
| first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alterts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>presto_conn_id</strong> (<em>string</em>) – reference to the Presto database</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.presto_check_operator.PrestoIntervalCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.presto_check_operator.</code><code class="descname">PrestoIntervalCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/presto_check_operator.html#PrestoIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.presto_check_operator.PrestoIntervalCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.IntervalCheckOperator" title="airflow.operators.check_operator.IntervalCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.IntervalCheckOperator</span></code></a></p> |
| <p>Checks that the values of metrics given as SQL expressions are within |
| a certain tolerance of the ones from days_back before.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – the table name</li> |
| <li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check |
| against. Defaults to 7 days</li> |
| <li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics</li> |
| <li><strong>presto_conn_id</strong> (<em>string</em>) – reference to the Presto database</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.presto_to_mysql.PrestoToMySqlTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.presto_to_mysql.</code><code class="descname">PrestoToMySqlTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/presto_to_mysql.html#PrestoToMySqlTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.presto_to_mysql.PrestoToMySqlTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from Presto to MySQL, note that for now the data is loaded |
| into memory before being pushed to MySQL, so this operator should |
| be used for smallish amount of data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against Presto. (templated)</li> |
| <li><strong>mysql_table</strong> (<em>str</em>) – target MySQL table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>mysql_conn_id</strong> (<em>str</em>) – source mysql connection</li> |
| <li><strong>presto_conn_id</strong> (<em>str</em>) – source presto connection</li> |
| <li><strong>mysql_preoperator</strong> (<em>str</em>) – sql statement to run against mysql prior to |
| import, typically use to truncate of delete in place |
| of the data coming in, allowing the task to be idempotent (running |
| the task twice won’t double load data). (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.presto_check_operator.PrestoValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.presto_check_operator.</code><code class="descname">PrestoValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/presto_check_operator.html#PrestoValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.presto_check_operator.PrestoValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a></p> |
| <p>Performs a simple value check using sql code.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>presto_conn_id</strong> (<em>string</em>) – reference to the Presto database</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.python_operator.PythonOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.python_operator.</code><code class="descname">PythonOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/python_operator.html#PythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.python_operator.PythonOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes a Python callable</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>python_callable</strong> (<em>python callable</em>) – A reference to an object that is callable</li> |
| <li><strong>op_kwargs</strong> (<em>dict</em>) – a dictionary of keyword arguments that will get unpacked |
| in your function</li> |
| <li><strong>op_args</strong> (<em>list</em>) – a list of positional arguments that will get unpacked when |
| calling your callable</li> |
| <li><strong>provide_context</strong> (<em>bool</em>) – if set to true, Airflow will pass a set of |
| keyword arguments that can be used in your function. This set of |
| kwargs correspond exactly to what you can use in your jinja |
| templates. For this to work, you need to define <cite>**kwargs</cite> in your |
| function header.</li> |
| <li><strong>templates_dict</strong> (<em>dict of str</em>) – a dictionary where the values are templates that |
| will get templated by the Airflow engine sometime between |
| <code class="docutils literal notranslate"><span class="pre">__init__</span></code> and <code class="docutils literal notranslate"><span class="pre">execute</span></code> takes place and are made available |
| in your callable’s context after the template has been applied. (templated)</li> |
| <li><strong>templates_exts</strong> (<em>list</em><em>(</em><em>str</em><em>)</em>) – a list of file extensions to resolve while |
| processing templated fields, for examples <code class="docutils literal notranslate"><span class="pre">['.sql',</span> <span class="pre">'.hql']</span></code></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.python_operator.PythonVirtualenvOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.python_operator.</code><code class="descname">PythonVirtualenvOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/python_operator.html#PythonVirtualenvOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.python_operator.PythonVirtualenvOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.python_operator.PythonOperator" title="airflow.operators.python_operator.PythonOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.python_operator.PythonOperator</span></code></a></p> |
| <p>Allows one to run a function in a virtualenv that is created and destroyed |
| automatically (with certain caveats).</p> |
| <p>The function must be defined using def, and not be |
| part of a class. All imports must happen inside the function |
| and no variables outside of the scope may be referenced. A global scope |
| variable named virtualenv_string_args will be available (populated by |
| string_args). In addition, one can pass stuff through op_args and op_kwargs, and one |
| can use a return value. |
| Note that if your virtualenv runs in a different Python major version than Airflow, |
| you cannot use return values, op_args, or op_kwargs. You can use string_args though. |
| :param python_callable: A python function with no references to outside variables,</p> |
| <blockquote> |
| <div>defined with def, which will be run in a virtualenv</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>requirements</strong> (<em>list</em><em>(</em><em>str</em><em>)</em>) – A list of requirements as specified in a pip install command</li> |
| <li><strong>python_version</strong> (<em>str</em>) – The Python version to run the virtualenv with. Note that |
| both 2 and 2.7 are acceptable forms.</li> |
| <li><strong>use_dill</strong> (<em>bool</em>) – Whether to use dill to serialize |
| the args and result (pickle is default). This allow more complex types |
| but requires you to include dill in your requirements.</li> |
| <li><strong>system_site_packages</strong> (<em>bool</em>) – Whether to include |
| system_site_packages in your virtualenv. |
| See virtualenv documentation for more information.</li> |
| <li><strong>op_args</strong> – A list of positional arguments to pass to python_callable.</li> |
| <li><strong>op_kwargs</strong> (<em>dict</em>) – A dict of keyword arguments to pass to python_callable.</li> |
| <li><strong>string_args</strong> (<em>list</em><em>(</em><em>str</em><em>)</em>) – Strings that are present in the global var virtualenv_string_args, |
| available to python_callable at runtime as a list(str). Note that args are split |
| by newline.</li> |
| <li><strong>templates_dict</strong> (<em>dict of str</em>) – a dictionary where the values are templates that |
| will get templated by the Airflow engine sometime between |
| <code class="docutils literal notranslate"><span class="pre">__init__</span></code> and <code class="docutils literal notranslate"><span class="pre">execute</span></code> takes place and are made available |
| in your callable’s context after the template has been applied</li> |
| <li><strong>templates_exts</strong> (<em>list</em><em>(</em><em>str</em><em>)</em>) – a list of file extensions to resolve while |
| processing templated fields, for examples <code class="docutils literal notranslate"><span class="pre">['.sql',</span> <span class="pre">'.hql']</span></code></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_file_transform_operator.</code><code class="descname">S3FileTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies data from a source S3 location to a temporary location on the |
| local filesystem. Runs a transformation on this file as specified by |
| the transformation script and uploads the output to a destination S3 |
| location.</p> |
| <p>The locations of the source and the destination files in the local |
| filesystem is provided as an first and second arguments to the |
| transformation script. The transformation script is expected to read the |
| data from source, transform it and write the output to the local |
| destination file. The operator then takes over control and uploads the |
| local destination file to S3.</p> |
| <p>S3 Select is also available to filter the source contents. Users can |
| omit the transformation script if S3 Select expression is specified.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li> |
| <li><strong>source_aws_conn_id</strong> (<em>str</em>) – source s3 connection</li> |
| <li><strong>source_verify</strong> (<em>bool</em><em> or </em><em>str</em>) – <p>Whether or not to verify SSL certificates for S3 connetion. |
| By default SSL certificates are verified. |
| You can provide the following values:</p> |
| <ul> |
| <li><dl class="first docutils"> |
| <dt><code class="docutils literal notranslate"><span class="pre">False</span></code>: do not validate SSL certificates. SSL will still be used</dt> |
| <dd>(unless use_ssl is False), but SSL certificates will not be |
| verified.</dd> |
| </dl> |
| </li> |
| <li><dl class="first docutils"> |
| <dt><code class="docutils literal notranslate"><span class="pre">path/to/cert/bundle.pem</span></code>: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| <p>This is also applicable to <code class="docutils literal notranslate"><span class="pre">dest_verify</span></code>.</p> |
| </li> |
| <li><strong>dest_s3_key</strong> (<em>str</em>) – The key to be written from S3. (templated)</li> |
| <li><strong>dest_aws_conn_id</strong> (<em>str</em>) – destination s3 connection</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Replace dest S3 key if it already exists</li> |
| <li><strong>transform_script</strong> (<em>str</em>) – location of the executable transformation script</li> |
| <li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_to_hive_operator.</code><code class="descname">S3ToHiveTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from S3 to Hive. The operator downloads a file from S3, |
| stores the file locally before loading it into a Hive table. |
| If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, |
| a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated. |
| Hive data types are inferred from the cursor’s metadata from.</p> |
| <p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the tables gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li> |
| <li><strong>field_dict</strong> (<em>dict</em>) – A dictionary of the fields name in the file |
| as keys and their Hive types as values</li> |
| <li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every |
| execution</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns |
| and values. (templated)</li> |
| <li><strong>headers</strong> (<em>bool</em>) – whether the file contains column names on the first |
| line</li> |
| <li><strong>check_headers</strong> (<em>bool</em>) – whether the column names on the first line should be |
| checked against the keys of field_dict</li> |
| <li><strong>wildcard_match</strong> (<em>bool</em>) – whether the s3_key should be interpreted as a Unix |
| wildcard pattern</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – source s3 connection</li> |
| <li><strong>hive_cli_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| <li><strong>input_compressed</strong> (<em>bool</em>) – Boolean to determine if file decompression is |
| required to process headers</li> |
| <li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li> |
| <li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_to_redshift_operator.</code><code class="descname">S3ToRedshiftTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes an COPY command to load files from s3 to Redshift</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li> |
| <li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li> |
| <li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li> |
| <li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li> |
| <li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li> |
| <li><strong>copy_options</strong> (<em>list</em>) – reference to a list of COPY options</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.python_operator.ShortCircuitOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.python_operator.</code><code class="descname">ShortCircuitOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/python_operator.html#ShortCircuitOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.python_operator.ShortCircuitOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.python_operator.PythonOperator" title="airflow.operators.python_operator.PythonOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.python_operator.PythonOperator</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.SkipMixin</span></code></p> |
| <p>Allows a workflow to continue only if a condition is met. Otherwise, the |
| workflow “short-circuits” and downstream tasks are skipped.</p> |
| <p>The ShortCircuitOperator is derived from the PythonOperator. It evaluates a |
| condition and short-circuits the workflow if the condition is False. Any |
| downstream tasks are marked with a state of “skipped”. If the condition is |
| True, downstream tasks proceed as normal.</p> |
| <p>The condition is determined by the result of <cite>python_callable</cite>.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.http_operator.SimpleHttpOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.http_operator.</code><code class="descname">SimpleHttpOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/http_operator.html#SimpleHttpOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.http_operator.SimpleHttpOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Calls an endpoint on an HTTP system to execute an action</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>http_conn_id</strong> (<em>string</em>) – The connection to run the sensor against</li> |
| <li><strong>endpoint</strong> (<em>string</em>) – The relative part of the full url. (templated)</li> |
| <li><strong>method</strong> (<em>string</em>) – The HTTP method to use, default = “POST”</li> |
| <li><strong>data</strong> (<em>For POST/PUT</em><em>, </em><em>depends on the content-type parameter</em><em>, |
| </em><em>for GET a dictionary of key/value string pairs</em>) – The data to pass. POST-data in POST/PUT and params |
| in the URL for a GET request. (templated)</li> |
| <li><strong>headers</strong> (<em>a dictionary of string key/value pairs</em>) – The HTTP headers to be added to the GET request</li> |
| <li><strong>response_check</strong> (<em>A lambda</em><em> or </em><em>defined function.</em>) – A check against the ‘requests’ response object. |
| Returns True for ‘pass’ and False otherwise.</li> |
| <li><strong>extra_options</strong> (<em>A dictionary of options</em><em>, </em><em>where key is string and value |
| depends on the option that's being modified.</em>) – Extra options for the ‘requests’ library, see the |
| ‘requests’ documentation (options to modify timeout, ssl, etc.)</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – Push the response to Xcom (default: False)</li> |
| <li><strong>log_response</strong> (<em>bool</em>) – Log the response (default: False)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.slack_operator.SlackAPIOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.slack_operator.</code><code class="descname">SlackAPIOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/slack_operator.html#SlackAPIOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.slack_operator.SlackAPIOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Base Slack Operator |
| The SlackAPIPostOperator is derived from this operator. |
| In the future additional Slack API Operators will be derived from this class as well</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>slack_conn_id</strong> (<em>string</em>) – Slack connection ID which its password is Slack API token</li> |
| <li><strong>token</strong> (<em>string</em>) – Slack API token (<a class="reference external" href="https://api.slack.com/web">https://api.slack.com/web</a>)</li> |
| <li><strong>method</strong> (<em>string</em>) – The Slack API Method to Call (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</li> |
| <li><strong>api_params</strong> (<em>dict</em>) – API Method call parameters (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.slack_operator.SlackAPIOperator.construct_api_call_params"> |
| <code class="descname">construct_api_call_params</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/slack_operator.html#SlackAPIOperator.construct_api_call_params"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.slack_operator.SlackAPIOperator.construct_api_call_params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Used by the execute function. Allows templating on the source fields |
| of the api_call_params dict before construction</p> |
| <p>Override in child classes. |
| Each SlackAPIOperator child class is responsible for |
| having a construct_api_call_params function |
| which sets self.api_call_params with a dict of |
| API call parameters (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.operators.slack_operator.SlackAPIOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/slack_operator.html#SlackAPIOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.slack_operator.SlackAPIOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>SlackAPIOperator calls will not fail even if the call is not unsuccessful. |
| It should not prevent a DAG from completing in success</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.slack_operator.SlackAPIPostOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.slack_operator.</code><code class="descname">SlackAPIPostOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/slack_operator.html#SlackAPIPostOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.slack_operator.SlackAPIPostOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.slack_operator.SlackAPIOperator" title="airflow.operators.slack_operator.SlackAPIOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.slack_operator.SlackAPIOperator</span></code></a></p> |
| <p>Posts messages to a slack channel</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>channel</strong> (<em>string</em>) – channel in which to post message on slack name (#general) or |
| ID (C12318391). (templated)</li> |
| <li><strong>username</strong> (<em>string</em>) – Username that airflow will be posting to Slack as. (templated)</li> |
| <li><strong>text</strong> (<em>string</em>) – message to send to slack. (templated)</li> |
| <li><strong>icon_url</strong> (<em>string</em>) – url to icon used for this message</li> |
| <li><strong>attachments</strong> (<em>array of hashes</em>) – extra formatting details. (templated) |
| - see <a class="reference external" href="https://api.slack.com/docs/attachments">https://api.slack.com/docs/attachments</a>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.slack_operator.SlackAPIPostOperator.construct_api_call_params"> |
| <code class="descname">construct_api_call_params</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/slack_operator.html#SlackAPIPostOperator.construct_api_call_params"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.slack_operator.SlackAPIPostOperator.construct_api_call_params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Used by the execute function. Allows templating on the source fields |
| of the api_call_params dict before construction</p> |
| <p>Override in child classes. |
| Each SlackAPIOperator child class is responsible for |
| having a construct_api_call_params function |
| which sets self.api_call_params with a dict of |
| API call parameters (<a class="reference external" href="https://api.slack.com/methods">https://api.slack.com/methods</a>)</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.sqlite_operator.SqliteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.sqlite_operator.</code><code class="descname">SqliteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/sqlite_operator.html#SqliteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.sqlite_operator.SqliteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes sql code in a specific Sqlite database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sqlite_conn_id</strong> (<em>string</em>) – reference to a specific sqlite database</li> |
| <li><strong>sql</strong> (<em>string</em><em> or </em><em>string pointing to a template file. File must have |
| a '.sql' extensions.</em>) – the sql code to be executed. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.subdag_operator.SubDagOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.subdag_operator.</code><code class="descname">SubDagOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/subdag_operator.html#SubDagOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.subdag_operator.SubDagOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.dagrun_operator.TriggerDagRunOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.dagrun_operator.</code><code class="descname">TriggerDagRunOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/dagrun_operator.html#TriggerDagRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.dagrun_operator.TriggerDagRunOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Triggers a DAG run for a specified <code class="docutils literal notranslate"><span class="pre">dag_id</span></code></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>trigger_dag_id</strong> (<em>str</em>) – the dag_id to trigger (templated)</li> |
| <li><strong>python_callable</strong> (<em>python callable</em>) – a reference to a python function that will be |
| called while passing it the <code class="docutils literal notranslate"><span class="pre">context</span></code> object and a placeholder |
| object <code class="docutils literal notranslate"><span class="pre">obj</span></code> for your callable to fill and return if you want |
| a DagRun created. This <code class="docutils literal notranslate"><span class="pre">obj</span></code> object contains a <code class="docutils literal notranslate"><span class="pre">run_id</span></code> and |
| <code class="docutils literal notranslate"><span class="pre">payload</span></code> attribute that you can modify in your function. |
| The <code class="docutils literal notranslate"><span class="pre">run_id</span></code> should be a unique identifier for that DAG run, and |
| the payload has to be a picklable object that will be made available |
| to your tasks while executing that DAG run. Your function header |
| should look like <code class="docutils literal notranslate"><span class="pre">def</span> <span class="pre">foo(context,</span> <span class="pre">dag_run_obj):</span></code></li> |
| <li><strong>execution_date</strong> (<em>str</em><em> or </em><em>datetime.datetime</em>) – Execution date for the dag (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.check_operator.ValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.check_operator.</code><code class="descname">ValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/check_operator.html#ValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.check_operator.ValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Performs a simple value check using sql code.</p> |
| <p>Note that this is an abstract class and get_db_hook |
| needs to be defined. Whereas a get_db_hook is hook that gets a |
| single record from an external source.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>sql</strong> (<em>string</em>) – the sql to be executed. (templated)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.redshift_to_s3_operator.</code><code class="descname">RedshiftToS3Transfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes an UNLOAD command to s3 as a CSV with headers</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li> |
| <li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li> |
| <li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li> |
| <li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li> |
| <li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li> |
| <li><strong>unload_options</strong> (<em>list</em>) – reference to a list of UNLOAD options</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sensors"> |
| <h4>Sensors<a class="headerlink" href="#sensors" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.sensors.external_task_sensor.ExternalTaskSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.external_task_sensor.</code><code class="descname">ExternalTaskSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/external_task_sensor.html#ExternalTaskSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.external_task_sensor.ExternalTaskSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a task to complete in a different DAG</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>external_dag_id</strong> (<em>string</em>) – The dag_id that contains the task you want to |
| wait for</li> |
| <li><strong>external_task_id</strong> (<em>string</em>) – The task_id that contains the task you want to |
| wait for</li> |
| <li><strong>allowed_states</strong> (<em>list</em>) – list of allowed states, default is <code class="docutils literal notranslate"><span class="pre">['success']</span></code></li> |
| <li><strong>execution_delta</strong> (<em>datetime.timedelta</em>) – time difference with the previous execution to |
| look at, the default is the same execution_date as the current task. |
| For yesterday, use [positive!] datetime.timedelta(days=1). Either |
| execution_delta or execution_date_fn can be passed to |
| ExternalTaskSensor, but not both.</li> |
| <li><strong>execution_date_fn</strong> (<em>callable</em>) – function that receives the current execution date |
| and returns the desired execution dates to query. Either execution_delta |
| or execution_date_fn can be passed to ExternalTaskSensor, but not both.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.external_task_sensor.ExternalTaskSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/external_task_sensor.html#ExternalTaskSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.external_task_sensor.ExternalTaskSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.hdfs_sensor.HdfsSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.hdfs_sensor.</code><code class="descname">HdfsSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/hdfs_sensor.html#HdfsSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.hdfs_sensor.HdfsSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a file or folder to land in HDFS</p> |
| <dl class="staticmethod"> |
| <dt id="airflow.sensors.hdfs_sensor.HdfsSensor.filter_for_filesize"> |
| <em class="property">static </em><code class="descname">filter_for_filesize</code><span class="sig-paren">(</span><em>result</em>, <em>size=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/hdfs_sensor.html#HdfsSensor.filter_for_filesize"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.hdfs_sensor.HdfsSensor.filter_for_filesize" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Will test the filepath result and test if its size is at least self.filesize</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>result</strong> – a list of dicts returned by Snakebite ls</li> |
| <li><strong>size</strong> – the file size in MB a file should be at least to trigger True</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">(bool) depending on the matching criteria</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.sensors.hdfs_sensor.HdfsSensor.filter_for_ignored_ext"> |
| <em class="property">static </em><code class="descname">filter_for_ignored_ext</code><span class="sig-paren">(</span><em>result</em>, <em>ignored_ext</em>, <em>ignore_copying</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/hdfs_sensor.html#HdfsSensor.filter_for_ignored_ext"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.hdfs_sensor.HdfsSensor.filter_for_ignored_ext" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Will filter if instructed to do so the result to remove matching criteria</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>result</strong> – (list) of dicts returned by Snakebite ls</li> |
| <li><strong>ignored_ext</strong> – (list) of ignored extensions</li> |
| <li><strong>ignore_copying</strong> – (bool) shall we ignore ?</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">(list) of dicts which were not removed</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.sensors.hdfs_sensor.HdfsSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/hdfs_sensor.html#HdfsSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.hdfs_sensor.HdfsSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.hive_partition_sensor.HivePartitionSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.hive_partition_sensor.</code><code class="descname">HivePartitionSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/hive_partition_sensor.html#HivePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.hive_partition_sensor.HivePartitionSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a partition to show up in Hive.</p> |
| <p>Note: Because <code class="docutils literal notranslate"><span class="pre">partition</span></code> supports general logical operators, it |
| can be inefficient. Consider using NamedHivePartitionSensor instead if |
| you don’t need the full flexibility of HivePartitionSensor.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>string</em>) – The name of the table to wait for, supports the dot |
| notation (my_database.my_table)</li> |
| <li><strong>partition</strong> (<em>string</em>) – The partition clause to wait for. This is passed as |
| is to the metastore Thrift client <code class="docutils literal notranslate"><span class="pre">get_partitions_by_filter</span></code> method, |
| and apparently supports SQL like notation as in <code class="docutils literal notranslate"><span class="pre">ds='2015-01-01'</span> |
| <span class="pre">AND</span> <span class="pre">type='value'</span></code> and comparison operators as in <code class="docutils literal notranslate"><span class="pre">"ds>=2015-01-01"</span></code></li> |
| <li><strong>metastore_conn_id</strong> (<em>str</em>) – reference to the metastore thrift service |
| connection id</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.hive_partition_sensor.HivePartitionSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/hive_partition_sensor.html#HivePartitionSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.hive_partition_sensor.HivePartitionSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.http_sensor.HttpSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.http_sensor.</code><code class="descname">HttpSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/http_sensor.html#HttpSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.http_sensor.HttpSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <dl class="docutils"> |
| <dt>Executes a HTTP get statement and returns False on failure:</dt> |
| <dd>404 not found or response_check function returned False</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>http_conn_id</strong> (<em>string</em>) – The connection to run the sensor against</li> |
| <li><strong>method</strong> (<em>string</em>) – The HTTP request method to use</li> |
| <li><strong>endpoint</strong> (<em>string</em>) – The relative part of the full url</li> |
| <li><strong>request_params</strong> (<em>a dictionary of string key/value pairs</em>) – The parameters to be added to the GET url</li> |
| <li><strong>headers</strong> (<em>a dictionary of string key/value pairs</em>) – The HTTP headers to be added to the GET request</li> |
| <li><strong>response_check</strong> (<em>A lambda</em><em> or </em><em>defined function.</em>) – A check against the ‘requests’ response object. |
| Returns True for ‘pass’ and False otherwise.</li> |
| <li><strong>extra_options</strong> (<em>A dictionary of options</em><em>, </em><em>where key is string and value |
| depends on the option that's being modified.</em>) – Extra options for the ‘requests’ library, see the |
| ‘requests’ documentation (options to modify timeout, ssl, etc.)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.http_sensor.HttpSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/http_sensor.html#HttpSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.http_sensor.HttpSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.metastore_partition_sensor.MetastorePartitionSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.metastore_partition_sensor.</code><code class="descname">MetastorePartitionSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/metastore_partition_sensor.html#MetastorePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.metastore_partition_sensor.MetastorePartitionSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.sql_sensor.SqlSensor" title="airflow.sensors.sql_sensor.SqlSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.sql_sensor.SqlSensor</span></code></a></p> |
| <p>An alternative to the HivePartitionSensor that talk directly to the |
| MySQL db. This was created as a result of observing sub optimal |
| queries generated by the Metastore thrift service when hitting |
| subpartitioned tables. The Thrift service’s queries were written in a |
| way that wouldn’t leverage the indexes.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>schema</strong> (<em>str</em>) – the schema</li> |
| <li><strong>table</strong> (<em>str</em>) – the table</li> |
| <li><strong>partition_name</strong> (<em>str</em>) – the partition name, as defined in the PARTITIONS |
| table of the Metastore. Order of the fields does matter. |
| Examples: <code class="docutils literal notranslate"><span class="pre">ds=2016-01-01</span></code> or |
| <code class="docutils literal notranslate"><span class="pre">ds=2016-01-01/sub=foo</span></code> for a sub partitioned table</li> |
| <li><strong>mysql_conn_id</strong> (<em>str</em>) – a reference to the MySQL conn_id for the metastore</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.metastore_partition_sensor.MetastorePartitionSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/metastore_partition_sensor.html#MetastorePartitionSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.metastore_partition_sensor.MetastorePartitionSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.named_hive_partition_sensor.NamedHivePartitionSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.named_hive_partition_sensor.</code><code class="descname">NamedHivePartitionSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/named_hive_partition_sensor.html#NamedHivePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.named_hive_partition_sensor.NamedHivePartitionSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a set of partitions to show up in Hive.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>partition_names</strong> (<em>list of strings</em>) – List of fully qualified names of the |
| partitions to wait for. A fully qualified name is of the |
| form <code class="docutils literal notranslate"><span class="pre">schema.table/pk1=pv1/pk2=pv2</span></code>, for example, |
| default.users/ds=2016-01-01. This is passed as is to the metastore |
| Thrift client <code class="docutils literal notranslate"><span class="pre">get_partitions_by_name</span></code> method. Note that |
| you cannot use logical or comparison operators as in |
| HivePartitionSensor.</li> |
| <li><strong>metastore_conn_id</strong> (<em>str</em>) – reference to the metastore thrift service |
| connection id</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.named_hive_partition_sensor.NamedHivePartitionSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/named_hive_partition_sensor.html#NamedHivePartitionSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.named_hive_partition_sensor.NamedHivePartitionSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.s3_key_sensor.S3KeySensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.s3_key_sensor.</code><code class="descname">S3KeySensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/s3_key_sensor.html#S3KeySensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.s3_key_sensor.S3KeySensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a key (a file-like instance on S3) to be present in a S3 bucket. |
| S3 being a key/value it does not support folders. The path is just a key |
| a resource.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_key</strong> (<em>str</em>) – The key being waited on. Supports full s3:// style url |
| or relative path from root level.</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the S3 bucket</li> |
| <li><strong>wildcard_match</strong> (<em>bool</em>) – whether the bucket_key should be interpreted as a |
| Unix wildcard pattern</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – a reference to the s3 connection</li> |
| <li><strong>verify</strong> (<em>bool</em><em> or </em><em>str</em>) – <p>Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div>(unless use_ssl is False), but SSL certificates will not be |
| verified.</div></blockquote> |
| <ul> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.s3_key_sensor.S3KeySensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/s3_key_sensor.html#S3KeySensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.s3_key_sensor.S3KeySensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.s3_prefix_sensor.S3PrefixSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.s3_prefix_sensor.</code><code class="descname">S3PrefixSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/s3_prefix_sensor.html#S3PrefixSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.s3_prefix_sensor.S3PrefixSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a prefix to exist. A prefix is the first part of a key, |
| thus enabling checking of constructs similar to glob airfl* or |
| SQL LIKE ‘airfl%’. There is the possibility to precise a delimiter to |
| indicate the hierarchy or keys, meaning that the match will stop at that |
| delimiter. Current code accepts sane delimiters, i.e. characters that |
| are NOT special characters in the Python regex engine.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the S3 bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – The prefix being waited on. Relative path from bucket root level.</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – The delimiter intended to show hierarchy. |
| Defaults to ‘/’.</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – a reference to the s3 connection</li> |
| <li><strong>verify</strong> (<em>bool</em><em> or </em><em>str</em>) – <p>Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div>(unless use_ssl is False), but SSL certificates will not be |
| verified.</div></blockquote> |
| <ul> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.s3_prefix_sensor.S3PrefixSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/s3_prefix_sensor.html#S3PrefixSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.s3_prefix_sensor.S3PrefixSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.sql_sensor.SqlSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.sql_sensor.</code><code class="descname">SqlSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/sql_sensor.html#SqlSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.sql_sensor.SqlSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Runs a sql statement until a criteria is met. It will keep trying while |
| sql returns no row, or if the first cell in (0, ‘0’, ‘’).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>conn_id</strong> (<em>string</em>) – The connection to run the sensor against</li> |
| <li><strong>sql</strong> – The sql to run. To pass, it needs to return at least one cell |
| that contains a non-zero / empty string value.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.sql_sensor.SqlSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/sql_sensor.html#SqlSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.sql_sensor.SqlSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.time_sensor.TimeSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.time_sensor.</code><code class="descname">TimeSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/time_sensor.html#TimeSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.time_sensor.TimeSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits until the specified time of the day.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>target_time</strong> (<em>datetime.time</em>) – time after which the job succeeds</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.time_sensor.TimeSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/time_sensor.html#TimeSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.time_sensor.TimeSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.time_delta_sensor.TimeDeltaSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.time_delta_sensor.</code><code class="descname">TimeDeltaSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/time_delta_sensor.html#TimeDeltaSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.time_delta_sensor.TimeDeltaSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a timedelta after the task’s execution_date + schedule_interval. |
| In Airflow, the daily task stamped with <code class="docutils literal notranslate"><span class="pre">execution_date</span></code> |
| 2016-01-01 can only start running on 2016-01-02. The timedelta here |
| represents the time after the execution period has closed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>delta</strong> (<em>datetime.timedelta</em>) – time length to wait after execution_date before succeeding</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.sensors.time_delta_sensor.TimeDeltaSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/time_delta_sensor.html#TimeDeltaSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.time_delta_sensor.TimeDeltaSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.sensors.web_hdfs_sensor.WebHdfsSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.sensors.web_hdfs_sensor.</code><code class="descname">WebHdfsSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/web_hdfs_sensor.html#WebHdfsSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.web_hdfs_sensor.WebHdfsSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a file or folder to land in HDFS</p> |
| <dl class="method"> |
| <dt id="airflow.sensors.web_hdfs_sensor.WebHdfsSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/sensors/web_hdfs_sensor.html#WebHdfsSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.sensors.web_hdfs_sensor.WebHdfsSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="community-contributed-operators"> |
| <h3>Community-contributed Operators<a class="headerlink" href="#community-contributed-operators" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="id2"> |
| <h4>Operators<a class="headerlink" href="#id2" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.adls_list_operator.</code><code class="descname">AzureDataLakeStorageListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/adls_list_operator.html#AzureDataLakeStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all files from the specified path</p> |
| <dl class="docutils"> |
| <dt>This operator returns a python list with the names of files which can be used by</dt> |
| <dd><cite>xcom</cite> in the downstream tasks.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>path</strong> (<em>str</em>) – The Azure Data Lake path to find the objects. Supports glob |
| strings (templated)</li> |
| <li><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Azure Data Lake Storage.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would list all the Parquet files from <code class="docutils literal notranslate"><span class="pre">folder/output/</span></code> |
| folder in the specified ADLS account</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">adls_files</span> <span class="o">=</span> <span class="n">AzureDataLakeStorageListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'adls_files'</span><span class="p">,</span> |
| <span class="n">path</span><span class="o">=</span><span class="s1">'folder/output/*.parquet'</span><span class="p">,</span> |
| <span class="n">azure_data_lake_conn_id</span><span class="o">=</span><span class="s1">'azure_data_lake_default'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.adls_to_gcs.AdlsToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.adls_to_gcs.</code><code class="descname">AdlsToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/adls_to_gcs.html#AdlsToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.adls_to_gcs.AdlsToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator" title="airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator</span></code></a></p> |
| <p>Synchronizes an Azure Data Lake Storage path with a GCS bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>src_adls</strong> (<em>str</em>) – The Azure Data Lake path to find the objects (templated)</li> |
| <li><strong>dest_gcs</strong> (<em>str</em>) – The Google Cloud Storage bucket and prefix to |
| store the objects. (templated)</li> |
| <li><strong>replace</strong> (<em>bool</em>) – If true, replaces same-named files in GCS</li> |
| <li><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Azure Data Lake Storage.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Google Cloud Storage.</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Examples</strong>:</dt> |
| <dd><p class="first">The following Operator would copy a single file named |
| <code class="docutils literal notranslate"><span class="pre">hello/world.avro</span></code> from ADLS to the GCS bucket <code class="docutils literal notranslate"><span class="pre">mybucket</span></code>. Its full |
| resulting gcs path will be <code class="docutils literal notranslate"><span class="pre">gs://mybucket/hello/world.avro</span></code></p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">AdlsToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_single_file'</span><span class="p">,</span> |
| <span class="n">src_adls</span><span class="o">=</span><span class="s1">'hello/world.avro'</span><span class="p">,</span> |
| <span class="n">dest_gcs</span><span class="o">=</span><span class="s1">'gs://mybucket'</span><span class="p">,</span> |
| <span class="n">replace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> |
| <span class="n">azure_data_lake_conn_id</span><span class="o">=</span><span class="s1">'azure_data_lake_default'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'google_cloud_default'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would copy all parquet files from ADLS |
| to the GCS bucket <code class="docutils literal notranslate"><span class="pre">mybucket</span></code>.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span> copy_all_files = AdlsToGoogleCloudStorageOperator( |
| task_id='copy_all_files', |
| src_adls='*.parquet', |
| dest_gcs='gs://mybucket', |
| replace=False, |
| azure_data_lake_conn_id='azure_data_lake_default', |
| google_cloud_storage_conn_id='google_cloud_default' |
| ) |
| |
| The following Operator would copy all parquet files from ADLS |
| path ``/hello/world``to the GCS bucket ``mybucket``. :: |
| copy_world_files = AdlsToGoogleCloudStorageOperator( |
| task_id='copy_world_files', |
| src_adls='hello/world/*.parquet', |
| dest_gcs='gs://mybucket', |
| replace=False, |
| azure_data_lake_conn_id='azure_data_lake_default', |
| google_cloud_storage_conn_id='google_cloud_default' |
| ) |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.aws_athena_operator.AWSAthenaOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.aws_athena_operator.</code><code class="descname">AWSAthenaOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/aws_athena_operator.html#AWSAthenaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.aws_athena_operator.AWSAthenaOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>An operator that submit presto query to athena.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query</strong> (<em>str</em>) – Presto to be run on athena. (templated)</li> |
| <li><strong>database</strong> (<em>str</em>) – Database to select. (templated)</li> |
| <li><strong>output_location</strong> (<em>str</em>) – s3 path to write the query results into. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to use</li> |
| <li><strong>sleep_time</strong> (<em>int</em>) – Time to wait between two consecutive call to check query status on athena</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.aws_athena_operator.AWSAthenaOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/aws_athena_operator.html#AWSAthenaOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.aws_athena_operator.AWSAthenaOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run Presto Query on Athena</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.aws_athena_operator.AWSAthenaOperator.on_kill"> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/aws_athena_operator.html#AWSAthenaOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.aws_athena_operator.AWSAthenaOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Cancel the submitted athena query</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.awsbatch_operator.</code><code class="descname">AWSBatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a job on AWS Batch Service</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – the name for the job that will run on AWS Batch</li> |
| <li><strong>job_definition</strong> (<em>str</em>) – the job definition name on AWS Batch</li> |
| <li><strong>job_queue</strong> (<em>str</em>) – the queue name on AWS Batch</li> |
| <li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive on |
| containerOverrides (templated): |
| <a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job">http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job</a></li> |
| <li><strong>max_retries</strong> (<em>int</em>) – exponential backoff retries while waiter is not |
| merged, 4200 = 48 hours</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None, |
| credential boto3 strategy will be used |
| (<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li> |
| <li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook. |
| Override the region_name in connection (if provided)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.azure_cosmos_operator.AzureCosmosInsertDocumentOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.azure_cosmos_operator.</code><code class="descname">AzureCosmosInsertDocumentOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/azure_cosmos_operator.html#AzureCosmosInsertDocumentOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.azure_cosmos_operator.AzureCosmosInsertDocumentOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Inserts a new document into the specified Cosmos database and collection |
| It will create both the database and collection if they do not already exist</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>database_name</strong> (<em>str</em>) – The name of the database. (templated)</li> |
| <li><strong>collection_name</strong> (<em>str</em>) – The name of the collection. (templated)</li> |
| <li><strong>document</strong> (<em>dict</em>) – The document to insert</li> |
| <li><strong>azure_cosmos_conn_id</strong> (<em>str</em>) – reference to a CosmosDB connection.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p> |
| <p>Performs checks against BigQuery. The <code class="docutils literal notranslate"><span class="pre">BigQueryCheckOperator</span></code> expects |
| a sql query that will return a single row. Each value on that |
| first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alterts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to the BigQuery database</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a></p> |
| <p>Performs a simple value check using sql code.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryIntervalCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.IntervalCheckOperator" title="airflow.operators.check_operator.IntervalCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.IntervalCheckOperator</span></code></a></p> |
| <p>Checks that the values of metrics given as SQL expressions are within |
| a certain tolerance of the ones from days_back before.</p> |
| <p>This method constructs a query like so</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SELECT</span> <span class="p">{</span><span class="n">metrics_threshold_dict_key</span><span class="p">}</span> <span class="n">FROM</span> <span class="p">{</span><span class="n">table</span><span class="p">}</span> |
| <span class="n">WHERE</span> <span class="p">{</span><span class="n">date_filter_column</span><span class="p">}</span><span class="o">=<</span><span class="n">date</span><span class="o">></span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – the table name</li> |
| <li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check |
| against. Defaults to 7 days</li> |
| <li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics, for |
| example ‘COUNT(*)’: 1.5 would require a 50 percent or less difference |
| between the current day, and the prior days_back.</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_get_data.</code><code class="descname">BigQueryGetDataOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Fetches the data from a BigQuery table (alternatively fetch data for selected columns) |
| and returns data in a python list. The number of elements in the returned list will |
| be equal to the number of rows fetched. Each element in the list will again be a list |
| where element would represent the columns values for that row.</p> |
| <p><strong>Example Result</strong>: <code class="docutils literal notranslate"><span class="pre">[['Tony',</span> <span class="pre">'10'],</span> <span class="pre">['Mike',</span> <span class="pre">'20'],</span> <span class="pre">['Steve',</span> <span class="pre">'15']]</span></code></p> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">If you pass fields to <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> which are in different order than the |
| order of columns already in |
| BQ table, the data will still be in the order of BQ table. |
| For example if the BQ table has 3 columns as |
| <code class="docutils literal notranslate"><span class="pre">[A,B,C]</span></code> and you pass ‘B,A’ in the <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> |
| the data would still be of the form <code class="docutils literal notranslate"><span class="pre">'A,B'</span></code>.</p> |
| </div> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">get_data</span> <span class="o">=</span> <span class="n">BigQueryGetDataOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'get_data_from_bq'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'test_dataset'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Transaction_partitions'</span><span class="p">,</span> |
| <span class="n">max_results</span><span class="o">=</span><span class="s1">'100'</span><span class="p">,</span> |
| <span class="n">selected_fields</span><span class="o">=</span><span class="s1">'DATE'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The dataset ID of the requested table. (templated)</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The table ID of the requested table. (templated)</li> |
| <li><strong>max_results</strong> (<em>string</em>) – The maximum number of records (rows) to be fetched |
| from the table. (templated)</li> |
| <li><strong>selected_fields</strong> (<em>string</em>) – List of fields to return (comma-separated). If |
| unspecified, all fields are returned.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyTableOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new, empty table in the specified BigQuery dataset, |
| optionally with schema.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it. |
| You can also create a table without schema.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The project to create the table into. (templated)</li> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The dataset to create the table into. (templated)</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The Name of the table to be created. (templated)</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}]</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><strong>gcs_schema_object</strong> (<em>string</em>) – Full path to the JSON file containing |
| schema (templated). For |
| example: <code class="docutils literal notranslate"><span class="pre">gs://test-bucket/dir1/dir2/employee_schema.json</span></code></li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – <p>configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning</a></p> |
| </div> |
| </li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – <p>a dictionary containing labels for the table, passed to BigQuery</p> |
| <p><strong>Example (with schema JSON in GCS)</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'BigQueryCreateEmptyTableOperator_task'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'ODS'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Employees'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'internal-gcp-project'</span><span class="p">,</span> |
| <span class="n">gcs_schema_object</span><span class="o">=</span><span class="s1">'gs://schema-bucket/employee_schema.json'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><strong>Corresponding Schema file</strong> (<code class="docutils literal notranslate"><span class="pre">employee_schema.json</span></code>):</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span> |
| <span class="p">{</span> |
| <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">,</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span> |
| <span class="p">},</span> |
| <span class="p">{</span> |
| <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">,</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span> |
| <span class="p">}</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| <p><strong>Example (with schema in the DAG)</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'BigQueryCreateEmptyTableOperator_task'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'ODS'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Employees'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'internal-gcp-project'</span><span class="p">,</span> |
| <span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}],</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateExternalTableOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new external table in the dataset with the data in Google Cloud |
| Storage.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to point the external table to. (templated)</li> |
| <li><strong>source_objects</strong> (<em>list</em>) – List of Google cloud storage URIs to point |
| table to. (templated) |
| If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (<project>.)<dataset>.<table> |
| BigQuery table to load data into (templated). If <project> is not included, |
| project will be the project defined in the connection json.</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}]</span> |
| </pre></div> |
| </div> |
| <p>Should not be set when source_format is ‘DATASTORE_BACKUP’.</p> |
| </li> |
| <li><strong>schema_object</strong> (<em>string</em>) – If set, a GCS object path pointing to a .json file that |
| contains the schema for the table. (templated)</li> |
| <li><strong>source_format</strong> (<em>string</em>) – File format of the data.</li> |
| <li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source. |
| Possible values include GZIP and NONE. |
| The default value is NONE. |
| This setting is ignored for Google Cloud Bigtable, |
| Google Cloud Datastore backups and Avro formats.</li> |
| <li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use for the CSV.</li> |
| <li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can |
| ignore when running the job.</li> |
| <li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li> |
| <li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li> |
| <li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns. |
| The missing values are treated as nulls. If false, records with missing trailing |
| columns are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result. Only applicable to CSV, ignored |
| for other formats.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:param labels a dictionary containing labels for the table, passed to BigQuery |
| :type labels: dict</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryDeleteDatasetOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>” |
| This operator deletes an existing dataset from your Project in Big query. |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete</a> |
| :param project_id: The project id of the dataset. |
| :type project_id: string |
| :param dataset_id: The dataset to be deleted. |
| :type dataset_id: string</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">delete_temp_data</span> <span class="o">=</span> <span class="n">BigQueryDeleteDatasetOperator</span><span class="p">(</span><span class="n">dataset_id</span> <span class="o">=</span> <span class="s1">'temp-dataset'</span><span class="p">,</span> |
| <span class="n">project_id</span> <span class="o">=</span> <span class="s1">'temp-project'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'_my_gcp_conn_'</span><span class="p">,</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'Deletetemp'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyDatasetOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyDatasetOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyDatasetOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>” |
| This operator is used to create new dataset for your Project in Big query. |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – The name of the project where we want to create the dataset. |
| Don’t need to provide, if projectId in dataset_reference.</li> |
| <li><strong>dataset_id</strong> (<em>str</em>) – The id of dataset. Don’t need to provide, |
| if datasetId in dataset_reference.</li> |
| <li><strong>dataset_reference</strong> – Dataset reference that could be provided with request body. |
| More info: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</a></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes BigQuery SQL queries in a specific BigQuery database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'.</em>) – (Deprecated. Use <cite>sql</cite> parameter instead) the sql code to be |
| executed (templated)</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'.</em>) – the sql code to be executed (templated)</li> |
| <li><strong>destination_dataset_table</strong> (<em>string</em>) – A dotted |
| (<project>.|<project>:)<dataset>.<table> that, if set, will store the results |
| of the query. (templated)</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – Specifies the action that occurs if the destination table |
| already exists. (default: ‘WRITE_EMPTY’)</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – Specifies whether the job is allowed to create new tables. |
| (default: ‘CREATE_IF_NEEDED’)</li> |
| <li><strong>allow_large_results</strong> (<em>boolean</em>) – Whether to allow large results.</li> |
| <li><strong>flatten_results</strong> (<em>boolean</em>) – If true and query uses legacy SQL dialect, flattens |
| all nested and repeated fields in the query results. <code class="docutils literal notranslate"><span class="pre">allow_large_results</span></code> |
| must be <code class="docutils literal notranslate"><span class="pre">true</span></code> if this is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>. For standard SQL queries, this |
| flag is ignored and results are never flattened.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>udf_config</strong> (<em>list</em>) – The User Defined Function configuration for the query. |
| See <a class="reference external" href="https://cloud.google.com/bigquery/user-defined-functions">https://cloud.google.com/bigquery/user-defined-functions</a> for details.</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) or standard SQL (false).</li> |
| <li><strong>maximum_billing_tier</strong> (<em>integer</em>) – Positive integer that serves as a multiplier |
| of the basic price. |
| Defaults to None, in which case it uses the value set in the project.</li> |
| <li><strong>maximum_bytes_billed</strong> (<em>float</em>) – Limits the bytes billed for this job. |
| Queries that will have bytes billed beyond this limit will fail |
| (without incurring a charge). If unspecified, this will be |
| set to your project default.</li> |
| <li><strong>api_resource_configs</strong> (<em>dict</em>) – a dictionary that contain params |
| ‘configuration’ applied for Google BigQuery Jobs API: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs</a> |
| for example, {‘query’: {‘useQueryCache’: False}}. You could use it |
| if you need to provide some params that are not supported by BigQueryOperator |
| like args.</li> |
| <li><strong>schema_update_options</strong> (<em>tuple</em>) – Allows the schema of the destination |
| table to be updated as a side effect of the load job.</li> |
| <li><strong>query_params</strong> (<em>dict</em>) – a dictionary containing query parameter types and |
| values, passed to BigQuery.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| <li><strong>priority</strong> (<em>string</em>) – Specifies a priority for the query. |
| Possible values include INTERACTIVE and BATCH. |
| The default value is INTERACTIVE.</li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications.</li> |
| <li><strong>cluster_fields</strong> (<em>list of str</em>) – Request that the result of this query be stored sorted |
| by one or more columns. This is only available in conjunction with |
| time_partitioning. The order of columns given determines the sort order.</li> |
| <li><strong>location</strong> (<em>str</em>) – The geographic location of the job. Required except for |
| US and EU. See details at |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/locations#specifying_your_location">https://cloud.google.com/bigquery/docs/locations#specifying_your_location</a></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_table_delete_operator.</code><code class="descname">BigQueryTableDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Deletes BigQuery tables</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>deletion_dataset_table</strong> (<em>string</em>) – A dotted |
| (<project>.|<project>:)<dataset>.<table> that indicates which table |
| will be deleted. (templated)</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>ignore_if_missing</strong> (<em>boolean</em>) – if True, then return success even if the |
| requested table does not exist.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_bigquery.</code><code class="descname">BigQueryToBigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies data from one BigQuery table to another.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about these parameters: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_project_dataset_tables</strong> (<em>list|string</em>) – One or more |
| dotted (project:<a href="#id3"><span class="problematic" id="id4">|</span></a>project.)<dataset>.<table> BigQuery tables to use as the |
| source data. If <project> is not included, project will be the |
| project defined in the connection json. Use a list if there are multiple |
| source tables. (templated)</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The destination BigQuery |
| table. Format is: (project:<a href="#id5"><span class="problematic" id="id6">|</span></a>project.)<dataset>.<table> (templated)</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_gcs.</code><code class="descname">BigQueryToCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Transfers a BigQuery table to a Google Cloud Storage bucket.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about these parameters: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">https://cloud.google.com/bigquery/docs/reference/v2/jobs</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_project_dataset_table</strong> (<em>string</em>) – The dotted |
| <code class="docutils literal notranslate"><span class="pre">(<project>.|<project>:)<dataset>.<table></span></code> BigQuery table to use as the |
| source data. If <project> is not included, project will be the project |
| defined in the connection json. (templated)</li> |
| <li><strong>destination_cloud_storage_uris</strong> (<em>list</em>) – The destination Google Cloud |
| Storage URI (e.g. gs://some-bucket/some-file.txt). (templated) Follows |
| convention defined here: |
| https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple</li> |
| <li><strong>compression</strong> (<em>string</em>) – Type of compression to use.</li> |
| <li><strong>export_format</strong> (<em>string</em>) – File format to export.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when extracting to a CSV.</li> |
| <li><strong>print_header</strong> (<em>boolean</em>) – Whether to print a header for a CSV file extract.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.cassandra_to_gcs.</code><code class="descname">CassandraToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/cassandra_to_gcs.html#CassandraToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copy data from Cassandra to Google cloud storage in JSON format</p> |
| <p>Note: Arrays of arrays are not supported.</p> |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator.convert_map_type"> |
| <em class="property">classmethod </em><code class="descname">convert_map_type</code><span class="sig-paren">(</span><em>name</em>, <em>value</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/cassandra_to_gcs.html#CassandraToGoogleCloudStorageOperator.convert_map_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator.convert_map_type" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Converts a map to a repeated RECORD that contains two fields: ‘key’ and ‘value’, |
| each will be converted to its corresopnding data type in BQ.</p> |
| </dd></dl> |
| |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator.convert_tuple_type"> |
| <em class="property">classmethod </em><code class="descname">convert_tuple_type</code><span class="sig-paren">(</span><em>name</em>, <em>value</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/cassandra_to_gcs.html#CassandraToGoogleCloudStorageOperator.convert_tuple_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator.convert_tuple_type" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Converts a tuple to RECORD that contains n fields, each will be converted |
| to its corresponding data type in bq and will be named ‘field_<index>’, where |
| index is determined by the order of the tuple elments defined in cassandra.</p> |
| </dd></dl> |
| |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator.convert_user_type"> |
| <em class="property">classmethod </em><code class="descname">convert_user_type</code><span class="sig-paren">(</span><em>name</em>, <em>value</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/cassandra_to_gcs.html#CassandraToGoogleCloudStorageOperator.convert_user_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.cassandra_to_gcs.CassandraToGoogleCloudStorageOperator.convert_user_type" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Converts a user type to RECORD that contains n fields, where n is the |
| number of attributes. Each element in the user type class will be converted to its |
| corresponding data type in BQ.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.databricks_operator.</code><code class="descname">DatabricksSubmitRunOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Submits a Spark job run to Databricks using the |
| <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">api/2.0/jobs/runs/submit</a> |
| API endpoint.</p> |
| <p>There are two ways to instantiate this operator.</p> |
| <p>In the first way, you can take the JSON payload that you typically use |
| to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint and pass it directly |
| to our <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> through the <code class="docutils literal notranslate"><span class="pre">json</span></code> parameter. |
| For example</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">json</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'new_cluster'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'spark_version'</span><span class="p">:</span> <span class="s1">'2.1.0-db3-scala2.11'</span><span class="p">,</span> |
| <span class="s1">'num_workers'</span><span class="p">:</span> <span class="mi">2</span> |
| <span class="p">},</span> |
| <span class="s1">'notebook_task'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'notebook_path'</span><span class="p">:</span> <span class="s1">'/Users/airflow@example.com/PrepareData'</span><span class="p">,</span> |
| <span class="p">},</span> |
| <span class="p">}</span> |
| <span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="s1">'notebook_run'</span><span class="p">,</span> <span class="n">json</span><span class="o">=</span><span class="n">json</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Another way to accomplish the same thing is to use the named parameters |
| of the <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> directly. Note that there is exactly |
| one named parameter for each top level parameter in the <code class="docutils literal notranslate"><span class="pre">runs/submit</span></code> |
| endpoint. In this method, your code would look like this:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">new_cluster</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'spark_version'</span><span class="p">:</span> <span class="s1">'2.1.0-db3-scala2.11'</span><span class="p">,</span> |
| <span class="s1">'num_workers'</span><span class="p">:</span> <span class="mi">2</span> |
| <span class="p">}</span> |
| <span class="n">notebook_task</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'notebook_path'</span><span class="p">:</span> <span class="s1">'/Users/airflow@example.com/PrepareData'</span><span class="p">,</span> |
| <span class="p">}</span> |
| <span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'notebook_run'</span><span class="p">,</span> |
| <span class="n">new_cluster</span><span class="o">=</span><span class="n">new_cluster</span><span class="p">,</span> |
| <span class="n">notebook_task</span><span class="o">=</span><span class="n">notebook_task</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>In the case where both the json parameter <strong>AND</strong> the named parameters |
| are provided, they will be merged together. If there are conflicts during the merge, |
| the named parameters will take precedence and override the top level <code class="docutils literal notranslate"><span class="pre">json</span></code> keys.</p> |
| <dl class="docutils"> |
| <dt>Currently the named parameters that <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> supports are</dt> |
| <dd><ul class="first last simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">notebook_task</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">new_cluster</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">libraries</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">run_name</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">timeout_seconds</span></code></li> |
| </ul> |
| </dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>json</strong> (<em>dict</em>) – <p>A JSON object containing API parameters which will be passed |
| directly to the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint. The other named parameters |
| (i.e. <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code>, <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code>..) to this operator will |
| be merged with this json dictionary if they are provided. |
| If there are conflicts during the merge, the named parameters will |
| take precedence and override the top level json keys. (templated)</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information about templating see <a class="reference internal" href="concepts.html#jinja-templating"><span class="std std-ref">Jinja Templating</span></a>. |
| <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">https://docs.databricks.com/api/latest/jobs.html#runs-submit</a></p> |
| </div> |
| </li> |
| <li><strong>spark_jar_task</strong> (<em>dict</em>) – <p>The main class and parameters for the JAR task. Note that |
| the actual JAR is specified in the <code class="docutils literal notranslate"><span class="pre">libraries</span></code>. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask">https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask</a></p> |
| </div> |
| </li> |
| <li><strong>notebook_task</strong> (<em>dict</em>) – <p>The notebook path and parameters for the notebook task. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask">https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask</a></p> |
| </div> |
| </li> |
| <li><strong>new_cluster</strong> (<em>dict</em>) – <p>Specs for a new cluster on which this task will be run. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster">https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster</a></p> |
| </div> |
| </li> |
| <li><strong>existing_cluster_id</strong> (<em>string</em>) – ID for existing cluster on which to run this task. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified. |
| This field will be templated.</li> |
| <li><strong>libraries</strong> (<em>list of dicts</em>) – <p>Libraries which this run will use. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary">https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary</a></p> |
| </div> |
| </li> |
| <li><strong>run_name</strong> (<em>string</em>) – The run name used for this task. |
| By default this will be set to the Airflow <code class="docutils literal notranslate"><span class="pre">task_id</span></code>. This <code class="docutils literal notranslate"><span class="pre">task_id</span></code> is a |
| required parameter of the superclass <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code>. |
| This field will be templated.</li> |
| <li><strong>timeout_seconds</strong> (<em>int32</em>) – The timeout for this run. By default a value of 0 is used |
| which means to have no timeout. |
| This field will be templated.</li> |
| <li><strong>databricks_conn_id</strong> (<em>string</em>) – The name of the Airflow connection to use. |
| By default and in the common case this will be <code class="docutils literal notranslate"><span class="pre">databricks_default</span></code>. To use |
| token based authentication, provide the key <code class="docutils literal notranslate"><span class="pre">token</span></code> in the extra field for the |
| connection.</li> |
| <li><strong>polling_period_seconds</strong> (<em>int</em>) – Controls the rate which we poll for the result of |
| this run. By default the operator will poll every 30 seconds.</li> |
| <li><strong>databricks_retry_limit</strong> (<em>int</em>) – Amount of times retry if the Databricks backend is |
| unreachable. Its value must be greater than or equal to 1.</li> |
| <li><strong>databricks_retry_delay</strong> (<em>float</em>) – Number of seconds to wait between retries (it |
| might be a floating point number).</li> |
| <li><strong>do_xcom_push</strong> (<em>boolean</em>) – Whether we should push run_id and run_page_url to xcom.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowJavaOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Java Cloud DataFlow batch job. The parameters of the operation |
| will be passed to the job.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>jar</strong> (<em>string</em>) – The reference to a self executing DataFlow jar.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li> |
| <li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| <li><strong>job_class</strong> (<em>string</em>) – The name of the dataflow job class to be executued, it |
| is often not the main class configured in the dataflow jar file.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>Both <code class="docutils literal notranslate"><span class="pre">jar</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> are templated so you can use variables in them.</p> |
| <p>Note that both |
| <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> will be merged to specify pipeline |
| execution parameter, and <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> is expected to save |
| high-level options, for instances, project and zone information, which |
| apply to all dataflow operators in the DAG.</p> |
| <p>It’s a good practice to define dataflow_* parameters in the default_args of the dag |
| like the project, zone and staging location.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'europe-west1-d'</span><span class="p">,</span> |
| <span class="s1">'stagingLocation'</span><span class="p">:</span> <span class="s1">'gs://my-staging-bucket/staging/'</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You need to pass the path to your dataflow as a file reference with the <code class="docutils literal notranslate"><span class="pre">jar</span></code> |
| parameter, the jar needs to be a self executing jar (see documentation here: |
| <a class="reference external" href="https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar">https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar</a>). |
| Use <code class="docutils literal notranslate"><span class="pre">options</span></code> to pass on options to your job.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'datapflow_example'</span><span class="p">,</span> |
| <span class="n">jar</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar'</span><span class="p">,</span> |
| <span class="n">options</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'autoscalingAlgorithm'</span><span class="p">:</span> <span class="s1">'BASIC'</span><span class="p">,</span> |
| <span class="s1">'maxNumWorkers'</span><span class="p">:</span> <span class="s1">'50'</span><span class="p">,</span> |
| <span class="s1">'start'</span><span class="p">:</span> <span class="s1">'{{ds}}'</span><span class="p">,</span> |
| <span class="s1">'partitionType'</span><span class="p">:</span> <span class="s1">'DAY'</span><span class="p">,</span> |
| <span class="s1">'labels'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'foo'</span> <span class="p">:</span> <span class="s1">'bar'</span><span class="p">}</span> |
| <span class="p">},</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp-airflow-service-account'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataflowTemplateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Templated Cloud DataFlow batch job. The parameters of the operation |
| will be passed to the job.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template</strong> (<em>string</em>) – The reference to the DataFlow template.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job environment options.</li> |
| <li><strong>parameters</strong> (<em>dict</em>) – Map of job specific parameters for the template.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>It’s a good practice to define dataflow_* parameters in the default_args of the dag |
| like the project, zone and staging location.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters</a> |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a></p> |
| </div> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'europe-west1-d'</span><span class="p">,</span> |
| <span class="s1">'tempLocation'</span><span class="p">:</span> <span class="s1">'gs://my-staging-bucket/staging/'</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You need to pass the path to your dataflow template as a file reference with the |
| <code class="docutils literal notranslate"><span class="pre">template</span></code> parameter. Use <code class="docutils literal notranslate"><span class="pre">parameters</span></code> to pass on parameters to your job. |
| Use <code class="docutils literal notranslate"><span class="pre">environment</span></code> to pass on runtime environment variables to your job.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataflowTemplateOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'datapflow_example'</span><span class="p">,</span> |
| <span class="n">template</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}'</span><span class="p">,</span> |
| <span class="n">parameters</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'inputFile'</span><span class="p">:</span> <span class="s2">"gs://bucket/input/my_input.txt"</span><span class="p">,</span> |
| <span class="s1">'outputFile'</span><span class="p">:</span> <span class="s2">"gs://bucket/output/my_output.txt"</span> |
| <span class="p">},</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp-airflow-service-account'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><code class="docutils literal notranslate"><span class="pre">template</span></code>, <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">parameters</span></code> are templated so you can |
| use variables in them.</p> |
| <p>Note that <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> is expected to save high-level options |
| for project information, which apply to all dataflow operators in the DAG.</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3">https://cloud.google.com/dataflow/docs/reference/rest/v1b3</a> |
| /LaunchTemplateParameters |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a> |
| For more detail on job template execution have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/templates/executing-templates">https://cloud.google.com/dataflow/docs/templates/executing-templates</a></p> |
| </div> |
| </div></blockquote> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowPythonOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Launching Cloud Dataflow jobs written in python. Note that both |
| dataflow_default_options and options will be merged to specify pipeline |
| execution parameter, and dataflow_default_options is expected to save |
| high-level options, for instances, project and zone information, which |
| apply to all dataflow operators in the DAG.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>py_file</strong> (<em>string</em>) – Reference to the python dataflow pipleline file.py, e.g., |
| /some/local/file/path/to/your/python/pipeline/file.</li> |
| <li><strong>py_options</strong> – Additional python options.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li> |
| <li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the python dataflow job.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Create a new cluster on Google Cloud Dataproc. The operator will wait until the |
| creation is successful or an error occurs in the creation process.</p> |
| <p>The parameters allow to configure the cluster. Please refer to</p> |
| <p><a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters</a></p> |
| <p>for a detailed explanation on the different parameters. Most of the configuration |
| parameters detailed in the link are available as a parameter to this operator.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster to create. (templated)</li> |
| <li><strong>project_id</strong> (<em>str</em>) – The ID of the google cloud project in which |
| to create the cluster. (templated)</li> |
| <li><strong>num_workers</strong> (<em>int</em>) – The # of workers to spin up. If set to zero will |
| spin up cluster in a single node mode</li> |
| <li><strong>storage_bucket</strong> (<em>string</em>) – The storage bucket to use, setting to None lets dataproc |
| generate a custom one for you</li> |
| <li><strong>init_actions_uris</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – List of GCS uri’s containing |
| dataproc initialization scripts</li> |
| <li><strong>init_action_timeout</strong> (<em>string</em>) – Amount of time executable scripts in |
| init_actions_uris has to complete</li> |
| <li><strong>metadata</strong> (<em>dict</em>) – dict of key-value google compute engine metadata entries |
| to add to all instances</li> |
| <li><strong>image_version</strong> (<em>string</em>) – the version of software inside the Dataproc cluster</li> |
| <li><strong>custom_image</strong> – custom Dataproc image for more info see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/guides/dataproc-images">https://cloud.google.com/dataproc/docs/guides/dataproc-images</a></li> |
| <li><strong>properties</strong> (<em>dict</em>) – dict of properties to set on |
| config files (e.g. spark-defaults.conf), see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig</a></li> |
| <li><strong>master_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the master node</li> |
| <li><strong>master_disk_type</strong> (<em>string</em>) – Type of the boot disk for the master node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li> |
| <li><strong>master_disk_size</strong> (<em>int</em>) – Disk size for the master node</li> |
| <li><strong>worker_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the worker nodes</li> |
| <li><strong>worker_disk_type</strong> (<em>string</em>) – Type of the boot disk for the worker node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li> |
| <li><strong>worker_disk_size</strong> (<em>int</em>) – Disk size for the worker nodes</li> |
| <li><strong>num_preemptible_workers</strong> (<em>int</em>) – The # of preemptible worker nodes to spin up</li> |
| <li><strong>labels</strong> (<em>dict</em>) – dict of labels to add to the cluster</li> |
| <li><strong>zone</strong> (<em>string</em>) – The zone where the cluster will be located. (templated)</li> |
| <li><strong>network_uri</strong> (<em>string</em>) – The network uri to be used for machine communication, cannot be |
| specified with subnetwork_uri</li> |
| <li><strong>subnetwork_uri</strong> (<em>string</em>) – The subnetwork uri to be used for machine communication, |
| cannot be specified with network_uri</li> |
| <li><strong>internal_ip_only</strong> (<em>bool</em>) – If true, all instances in the cluster will only |
| have internal IP addresses. This can only be enabled for subnetwork |
| enabled networks</li> |
| <li><strong>tags</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The GCE tags to add to all instances</li> |
| <li><strong>region</strong> – leave as ‘global’, might become relevant in the future. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>service_account</strong> (<em>string</em>) – The service account of the dataproc instances.</li> |
| <li><strong>service_account_scopes</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The URIs of service account scopes to be included.</li> |
| <li><strong>idle_delete_ttl</strong> (<em>int</em>) – The longest duration that cluster would keep alive while |
| staying idle. Passing this threshold will cause cluster to be auto-deleted. |
| A duration in seconds.</li> |
| <li><strong>auto_delete_time</strong> (<em>datetime.datetime</em>) – The time when cluster will be auto-deleted.</li> |
| <li><strong>auto_delete_ttl</strong> (<em>int</em>) – The life duration of cluster, the cluster will be |
| auto-deleted at the end of this duration. |
| A duration in seconds. (If auto_delete_time is set this parameter will be ignored)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">custom_image: string</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterScaleOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Scale, up or down, a cluster on Google Cloud Dataproc. |
| The operator will wait until the cluster is re-scaled.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataprocClusterScaleOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_scale'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">cluster_name</span><span class="o">=</span><span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="n">num_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">num_preemptible_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">graceful_decommission_timeout</span><span class="o">=</span><span class="s1">'1h'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on about scaling clusters have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters">https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to scale. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The region for the dataproc cluster. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>num_workers</strong> (<em>int</em>) – The new number of workers</li> |
| <li><strong>num_preemptible_workers</strong> (<em>int</em>) – The new number of preemptible workers</li> |
| <li><strong>graceful_decommission_timeout</strong> (<em>string</em>) – Timeout for graceful YARN decomissioning. |
| Maximum value is 1d</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Delete a cluster on Google Cloud Dataproc. The operator will wait until the |
| cluster is destroyed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to create. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation |
| will be passed to the cluster.</p> |
| <p>It’s a good practice to define dataproc_* parameters in the default_args of the dag |
| like the cluster name and UDFs.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'cluster_name'</span><span class="p">:</span> <span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="s1">'dataproc_pig_jars'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s1">'gs://example/udf/jar/datafu/1.2.0/datafu.jar'</span><span class="p">,</span> |
| <span class="s1">'gs://example/udf/jar/gpig/1.2/gpig.jar'</span> |
| <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You can pass a pig script as string or file reference. Use variables to pass on |
| variables for the pig script to be resolved on the cluster or use the parameters to |
| be resolved in the script as template parameters.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataProcPigOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_pig'</span><span class="p">,</span> |
| <span class="n">query</span><span class="o">=</span><span class="s1">'a_pig_script.pig'</span><span class="p">,</span> |
| <span class="n">variables</span><span class="o">=</span><span class="p">{</span><span class="s1">'out'</span><span class="p">:</span> <span class="s1">'gs://example/output/{{ds}}'</span><span class="p">},</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on about job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs">https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query |
| file (pg or pig extension). (templated)</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a pig script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_pig_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_pig_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for |
| UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHiveOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Hive query Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension).</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a hive script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query.</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This name by default |
| is the task_id appended with the execution data, but can be templated. The |
| name will always be appended with a random number to avoid name clashes.</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li> |
| <li><strong>dataproc_hive_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_hive_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for |
| UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkSqlOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Spark SQL query Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension). (templated)</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a spark sql script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Spark Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or |
| the main_class, not both together).</li> |
| <li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both |
| together).</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHadoopOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Hadoop Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or |
| the main_class, not both together).</li> |
| <li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both |
| together).</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_hadoop_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_hadoop_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPySparkOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a PySpark Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>main</strong> (<em>string</em>) – [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main |
| Python file to use as the driver. Must be a .py file.</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>pyfiles</strong> (<em>list</em>) – List of Python files to pass to the PySpark framework. |
| Supported file types: .py, .egg, and .zip</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li> |
| <li><strong>dataproc_pyspark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_pyspark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateBaseOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateBaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait |
| until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template_id</strong> (<em>string</em>) – The id of the template. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the template runs</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateInlineOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will |
| wait until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template</strong> (<em>map</em>) – The template contents. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the template runs</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_export_operator.</code><code class="descname">DatastoreExportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Export entities from Google Cloud Datastore to Cloud Storage</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – name of the cloud storage bucket to backup data</li> |
| <li><strong>namespace</strong> (<em>str</em>) – optional namespace path in the specified Cloud Storage bucket |
| to backup data. If this namespace does not exist in GCS, it will be created.</li> |
| <li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the Datastore connection id to use</li> |
| <li><strong>cloud_storage_conn_id</strong> (<em>string</em>) – the name of the cloud storage connection id to |
| force-write backup</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in the |
| export, refer to |
| <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li> |
| <li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li> |
| <li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for |
| execution status again</li> |
| <li><strong>overwrite_existing</strong> (<em>bool</em>) – if the storage bucket + namespace is not empty, it will be |
| emptied prior to exports. This enables overwriting existing backups.</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_import_operator.</code><code class="descname">DatastoreImportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Import entities from Cloud Storage to Google Cloud Datastore</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – container in Cloud Storage to store data</li> |
| <li><strong>file</strong> (<em>string</em>) – path of the backup metadata file in the specified Cloud Storage bucket. |
| It should have the extension .overall_export_metadata</li> |
| <li><strong>namespace</strong> (<em>str</em>) – optional namespace of the backup metadata file in |
| the specified Cloud Storage bucket.</li> |
| <li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in |
| the export, refer to |
| <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li> |
| <li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li> |
| <li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the connection id to use</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for |
| execution status again</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.discord_webhook_operator.DiscordWebhookOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.discord_webhook_operator.</code><code class="descname">DiscordWebhookOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/discord_webhook_operator.html#DiscordWebhookOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.discord_webhook_operator.DiscordWebhookOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.http_operator.SimpleHttpOperator" title="airflow.operators.http_operator.SimpleHttpOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.http_operator.SimpleHttpOperator</span></code></a></p> |
| <p>This operator allows you to post messages to Discord using incoming webhooks. |
| Takes a Discord connection ID with a default relative webhook endpoint. The |
| default endpoint can be overridden using the webhook_endpoint parameter |
| (<a class="reference external" href="https://discordapp.com/developers/docs/resources/webhook">https://discordapp.com/developers/docs/resources/webhook</a>).</p> |
| <p>Each Discord webhook can be pre-configured to use a specific username and |
| avatar_url. You can override these defaults in this operator.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>http_conn_id</strong> (<em>str</em>) – Http connection ID with host as “<a class="reference external" href="https://discord.com/api/">https://discord.com/api/</a>” and |
| default webhook endpoint in the extra field in the form of |
| {“webhook_endpoint”: “webhooks/{webhook.id}/{webhook.token}”}</li> |
| <li><strong>webhook_endpoint</strong> (<em>str</em>) – Discord webhook endpoint in the form of |
| “webhooks/{webhook.id}/{webhook.token}”</li> |
| <li><strong>message</strong> (<em>str</em>) – The message you want to send to your Discord channel |
| (max 2000 characters). (templated)</li> |
| <li><strong>username</strong> (<em>str</em>) – Override the default username of the webhook. (templated)</li> |
| <li><strong>avatar_url</strong> (<em>str</em>) – Override the default avatar of the webhook</li> |
| <li><strong>tts</strong> (<em>bool</em>) – Is a text-to-speech message</li> |
| <li><strong>proxy</strong> (<em>str</em>) – Proxy to use to make the Discord webhook call</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.discord_webhook_operator.DiscordWebhookOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/discord_webhook_operator.html#DiscordWebhookOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.discord_webhook_operator.DiscordWebhookOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Call the DiscordWebhookHook to post message</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.druid_operator.DruidOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.druid_operator.</code><code class="descname">DruidOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/druid_operator.html#DruidOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.druid_operator.DruidOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Allows to submit a task directly to druid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>json_index_file</strong> (<em>str</em>) – The filepath to the druid index specification</li> |
| <li><strong>druid_ingest_conn_id</strong> (<em>str</em>) – The connection id of the Druid overlord which |
| accepts index jobs</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.ecs_operator.ECSOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.ecs_operator.</code><code class="descname">ECSOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a task on AWS EC2 Container Service</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>task_definition</strong> (<em>str</em>) – the task definition name on EC2 Container Service</li> |
| <li><strong>cluster</strong> (<em>str</em>) – the cluster name on EC2 Container Service</li> |
| <li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive (templated): |
| <a class="reference external" href="http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task">http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task</a></li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None, |
| credential boto3 strategy will be used |
| (<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li> |
| <li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook. |
| Override the region_name in connection (if provided)</li> |
| <li><strong>launch_type</strong> (<em>str</em>) – the launch type on which to run your task (‘EC2’ or ‘FARGATE’)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_add_steps_operator.</code><code class="descname">EmrAddStepsOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>An operator that adds steps to an existing EMR job_flow.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> (<em>str</em>) – id of the JobFlow to add steps to. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| <li><strong>steps</strong> (<em>list</em>) – boto3 style steps to be added to the jobflow. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_create_job_flow_operator.</code><code class="descname">EmrCreateJobFlowOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates an EMR JobFlow, reading the config from the EMR connection. |
| A dictionary of JobFlow overrides can be passed that override |
| the config from the connection.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| <li><strong>emr_conn_id</strong> (<em>str</em>) – emr connection to use</li> |
| <li><strong>job_flow_overrides</strong> (<em>dict</em>) – boto3 style arguments to override |
| emr_connection extra. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_terminate_job_flow_operator.</code><code class="descname">EmrTerminateJobFlowOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator to terminate EMR JobFlows.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> (<em>str</em>) – id of the JobFlow to terminate. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_gcs.</code><code class="descname">FileToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Uploads a file to Google Cloud Storage. |
| Optionally can compress the file for upload.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>src</strong> (<em>string</em>) – Path to the local file. (templated)</li> |
| <li><strong>dst</strong> (<em>string</em>) – Destination path within the specified bucket. (templated)</li> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to. (templated)</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The Airflow connection ID to upload with</li> |
| <li><strong>mime_type</strong> (<em>string</em>) – The mime-type string</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any</li> |
| <li><strong>gzip</strong> (<em>bool</em>) – Allows for file to be compressed and uploaded as gzip</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads the file to Google cloud storage</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_wasb.</code><code class="descname">FileToWasbOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Uploads a file to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load. (templated)</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container. (templated)</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob. (templated)</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>load_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.load_file()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Blob Storage.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_download_operator.</code><code class="descname">GoogleCloudStorageDownloadOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Downloads a file from Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is. (templated)</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to download in the Google cloud |
| storage bucket. (templated)</li> |
| <li><strong>filename</strong> (<em>string</em>) – The file path on the local file system (where the |
| operator is being executed) that the file should be downloaded to. (templated) |
| If no filename passed, the downloaded data will not be stored on the local file |
| system.</li> |
| <li><strong>store_to_xcom_key</strong> (<em>string</em>) – If this param is set, the operator will push |
| the contents of the downloaded file to XCom with the key set in this |
| parameter. If not set, the downloaded data will not be pushed to XCom. (templated)</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_list_operator.</code><code class="descname">GoogleCloudStorageListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all objects from the bucket with the give string prefix and delimiter in name.</p> |
| <dl class="docutils"> |
| <dt>This operator returns a python list with the name of objects which can be used by</dt> |
| <dd><cite>xcom</cite> in the downstream task.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| this prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – The delimiter by which you want to filter the objects. (templated) |
| For e.g to lists the CSV files from in a directory in GCS you would use |
| delimiter=’.csv’.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would list all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">GCS_Files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'GCS_Files'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'sales/sales-2017/'</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="s1">'.avro'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_operator.</code><code class="descname">GoogleCloudStorageCreateBucketOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new bucket. Google Cloud Storage uses a flat namespace, |
| so you can’t create a bucket with a name that is already in use.</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information, see Bucket Naming Guidelines: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p> |
| </div> |
| </div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket. (templated)</li> |
| <li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored |
| and determines the SLA and the cost of storage (templated). Values include</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li> |
| </ul> |
| <p>If this value is not specified when the bucket is |
| created, it will default to STANDARD.</p> |
| </li> |
| <li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. (templated) |
| Object data for objects in the bucket resides in physical storage |
| within this region. Defaults to US.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p> |
| </div> |
| </li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project. (templated)</li> |
| <li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would create a new bucket <code class="docutils literal notranslate"><span class="pre">test-bucket</span></code> |
| with <code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code> storage class in <code class="docutils literal notranslate"><span class="pre">EU</span></code> region</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateBucket</span> <span class="o">=</span> <span class="n">GoogleCloudStorageCreateBucketOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'CreateNewBucket'</span><span class="p">,</span> |
| <span class="n">bucket_name</span><span class="o">=</span><span class="s1">'test-bucket'</span><span class="p">,</span> |
| <span class="n">storage_class</span><span class="o">=</span><span class="s1">'MULTI_REGIONAL'</span><span class="p">,</span> |
| <span class="n">location</span><span class="o">=</span><span class="s1">'EU'</span><span class="p">,</span> |
| <span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">'env'</span><span class="p">:</span> <span class="s1">'dev'</span><span class="p">,</span> <span class="s1">'team'</span><span class="p">:</span> <span class="s1">'airflow'</span><span class="p">},</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_bq.</code><code class="descname">GoogleCloudStorageToBigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Loads files from Google cloud storage into BigQuery.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to load from. (templated)</li> |
| <li><strong>source_objects</strong> (<em>list of str</em>) – List of Google cloud storage URIs to load from. (templated) |
| If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (<project>.)<dataset>.<table> |
| BigQuery table to load data into. If <project> is not included, |
| project will be the project defined in the connection json. (templated)</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load</a> |
| Should not be set when source_format is ‘DATASTORE_BACKUP’.</li> |
| <li><strong>schema_object</strong> (<em>string</em>) – If set, a GCS object path pointing to a .json file that |
| contains the schema for the table. (templated)</li> |
| <li><strong>source_format</strong> (<em>string</em>) – File format to export.</li> |
| <li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source. |
| Possible values include GZIP and NONE. |
| The default value is NONE. |
| This setting is ignored for Google Cloud Bigtable, |
| Google Cloud Datastore backups and Avro formats.</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li> |
| <li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when loading from a CSV.</li> |
| <li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can |
| ignore when running the job.</li> |
| <li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li> |
| <li><strong>ignore_unknown_values</strong> (<em>bool</em>) – [Optional] Indicates if BigQuery should allow |
| extra values that are not represented in the table schema. |
| If true, the extra values are ignored. If false, records with extra columns |
| are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result.</li> |
| <li><strong>allow_quoted_newlines</strong> (<em>bool</em>) – Whether to allow quoted newlines (true) or not (false).</li> |
| <li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns. |
| The missing values are treated as nulls. If false, records with missing trailing |
| columns are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result. Only applicable to CSV, ignored |
| for other formats.</li> |
| <li><strong>max_id_key</strong> (<em>string</em>) – If set, the name of a column in the BigQuery table |
| that’s to be loaded. This will be used to select the MAX value from |
| BigQuery after the load occurs. The results will be returned by the |
| execute() command, which in turn gets stored in XCom for future |
| operators to use. This can be helpful with incremental loads–during |
| future executions, you can pick up from the max ID.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>schema_update_options</strong> (<em>list</em>) – Allows the schema of the destination |
| table to be updated as a side effect of the load job.</li> |
| <li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li> |
| <li><strong>external_table</strong> (<em>bool</em>) – Flag to specify if the destination table should be |
| a BigQuery external table. Default Value is False.</li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications. |
| Note that ‘field’ is not available in concurrency with |
| dataset.table$partition.</li> |
| <li><strong>cluster_fields</strong> (<em>list of str</em>) – Request that the result of this load be stored sorted |
| by one or more columns. This is only available in conjunction with |
| time_partitioning. The order of columns given determines the sort order. |
| Not applicable for external tables.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies objects from a bucket to another, with renaming if requested.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The source Google cloud storage bucket where the |
| object is. (templated)</li> |
| <li><strong>source_object</strong> (<em>string</em>) – <p>The source name of the object to copy in the Google cloud |
| storage bucket. (templated) |
| If wildcards are used in this argument:</p> |
| <blockquote> |
| <div>You can use only one wildcard for objects (filenames) within your |
| bucket. The wildcard can appear inside the object name or at the |
| end of the object name. Appending a wildcard to the bucket name is |
| unsupported.</div></blockquote> |
| </li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination Google cloud storage bucket |
| where the object should be. (templated)</li> |
| <li><strong>destination_object</strong> (<em>string</em>) – The destination name of the object in the |
| destination Google cloud storage bucket. (templated) |
| If a wildcard is supplied in the source_object argument, this is the |
| prefix that will be prepended to the final destination objects’ paths. |
| Note that the source path’s part before the wildcard will be removed; |
| if it needs to be retained it should be appended to destination_object. |
| For example, with prefix <code class="docutils literal notranslate"><span class="pre">foo/*</span></code> and destination_object <code class="docutils literal notranslate"><span class="pre">blah/</span></code>, the |
| file <code class="docutils literal notranslate"><span class="pre">foo/baz</span></code> will be copied to <code class="docutils literal notranslate"><span class="pre">blah/baz</span></code>; to retain the prefix write |
| the destination_object as e.g. <code class="docutils literal notranslate"><span class="pre">blah/foo</span></code>, in which case the copied file |
| will be named <code class="docutils literal notranslate"><span class="pre">blah/foo/baz</span></code>.</li> |
| <li><strong>move_object</strong> (<em>bool</em>) – When move object is True, the object is moved instead |
| of copied to the new location. This is the equivalent of a mv command |
| as opposed to a cp command.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Examples</strong>:</dt> |
| <dd><p class="first">The following Operator would copy a single file named |
| <code class="docutils literal notranslate"><span class="pre">sales/sales-2017/january.avro</span></code> in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the file named |
| <code class="docutils literal notranslate"><span class="pre">copied_sales/2017/january-backup.avro`</span> <span class="pre">in</span> <span class="pre">the</span> <span class="pre">``data_backup</span></code> bucket</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_single_file'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/january.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="s1">'copied_sales/2017/january-backup.avro'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would copy all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the |
| <code class="docutils literal notranslate"><span class="pre">copied_sales/2017</span></code> folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_files'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/*.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="s1">'copied_sales/2017/'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would move all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the |
| same folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket, deleting the original files in the |
| process.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">move_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'move_files'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/*.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">move_object</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_gcs_transfer_operator.GoogleCloudStorageToGoogleCloudStorageTransferOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs_transfer_operator.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageTransferOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs_transfer_operator.html#GoogleCloudStorageToGoogleCloudStorageTransferOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs_transfer_operator.GoogleCloudStorageToGoogleCloudStorageTransferOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies objects from a bucket to another using the GCP Storage Transfer |
| Service.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>str</em>) – The source Google cloud storage bucket where the |
| object is. (templated)</li> |
| <li><strong>destination_bucket</strong> (<em>str</em>) – The destination Google cloud storage bucket |
| where the object should be. (templated)</li> |
| <li><strong>project_id</strong> (<em>str</em>) – The ID of the Google Cloud Platform Console project that |
| owns the job</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional connection ID to use when connecting to Google Cloud |
| Storage.</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>description</strong> (<em>str</em>) – Optional transfer service job description</li> |
| <li><strong>schedule</strong> (<em>dict</em>) – Optional transfer service schedule; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs</a>. |
| If not set, run transfer job once as soon as the operator runs</li> |
| <li><strong>object_conditions</strong> (<em>dict</em>) – Optional transfer service object conditions; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions</a></li> |
| <li><strong>transfer_options</strong> (<em>dict</em>) – Optional transfer service transfer options; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions</a></li> |
| <li><strong>wait</strong> (<em>bool</em>) – Wait for transfer to finish; defaults to <cite>True</cite></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">gcs_to_gcs_transfer_op</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageTransferOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'gcs_to_gcs_transfer_example'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'my-source-bucket'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'my-destination-bucket'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my_dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_s3.GoogleCloudStorageToS3Operator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_s3.</code><code class="descname">GoogleCloudStorageToS3Operator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_s3.html#GoogleCloudStorageToS3Operator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_s3.GoogleCloudStorageToS3Operator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator" title="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator</span></code></a></p> |
| <p>Synchronizes a Google Cloud Storage bucket with an S3 bucket.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google Cloud Storage bucket to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| this prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – The delimiter by which you want to filter the objects. (templated) |
| For e.g to lists the CSV files from in a directory in GCS you would use |
| delimiter=’.csv’.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google Cloud Storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>dest_aws_conn_id</strong> (<em>str</em>) – The destination S3 connection</li> |
| <li><strong>dest_s3_key</strong> (<em>str</em>) – The base S3 key to be used to store the files. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">Parame dest_verify:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.hipchat_operator.HipChatAPIOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.hipchat_operator.</code><code class="descname">HipChatAPIOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/hipchat_operator.html#HipChatAPIOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.hipchat_operator.HipChatAPIOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Base HipChat Operator. |
| All derived HipChat operators reference from HipChat’s official REST API documentation |
| at <a class="reference external" href="https://www.hipchat.com/docs/apiv2">https://www.hipchat.com/docs/apiv2</a>. Before using any HipChat API operators you need |
| to get an authentication token at <a class="reference external" href="https://www.hipchat.com/docs/apiv2/auth">https://www.hipchat.com/docs/apiv2/auth</a>. |
| In the future additional HipChat operators will be derived from this class as well.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>token</strong> (<em>str</em>) – HipChat REST API authentication token</li> |
| <li><strong>base_url</strong> (<em>str</em>) – HipChat REST API base url.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.hipchat_operator.HipChatAPIOperator.prepare_request"> |
| <code class="descname">prepare_request</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/hipchat_operator.html#HipChatAPIOperator.prepare_request"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.hipchat_operator.HipChatAPIOperator.prepare_request" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Used by the execute function. Set the request method, url, and body of HipChat’s |
| REST API call. |
| Override in child class. Each HipChatAPI child operator is responsible for having |
| a prepare_request method call which sets self.method, self.url, and self.body.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.hipchat_operator.HipChatAPISendRoomNotificationOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.hipchat_operator.</code><code class="descname">HipChatAPISendRoomNotificationOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/hipchat_operator.html#HipChatAPISendRoomNotificationOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.hipchat_operator.HipChatAPISendRoomNotificationOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.hipchat_operator.HipChatAPIOperator" title="airflow.contrib.operators.hipchat_operator.HipChatAPIOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.hipchat_operator.HipChatAPIOperator</span></code></a></p> |
| <p>Send notification to a specific HipChat room. |
| More info: <a class="reference external" href="https://www.hipchat.com/docs/apiv2/method/send_room_notification">https://www.hipchat.com/docs/apiv2/method/send_room_notification</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>room_id</strong> (<em>str</em>) – Room in which to send notification on HipChat. (templated)</li> |
| <li><strong>message</strong> (<em>str</em>) – The message body. (templated)</li> |
| <li><strong>frm</strong> (<em>str</em>) – Label to be shown in addition to sender’s name</li> |
| <li><strong>message_format</strong> (<em>str</em>) – How the notification is rendered: html or text</li> |
| <li><strong>color</strong> (<em>str</em>) – Background color of the msg: yellow, green, red, purple, gray, or random</li> |
| <li><strong>attach_to</strong> (<em>str</em>) – The message id to attach this notification to</li> |
| <li><strong>notify</strong> (<em>bool</em>) – Whether this message should trigger a user notification</li> |
| <li><strong>card</strong> (<em>dict</em>) – HipChat-defined card object</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.hipchat_operator.HipChatAPISendRoomNotificationOperator.prepare_request"> |
| <code class="descname">prepare_request</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/hipchat_operator.html#HipChatAPISendRoomNotificationOperator.prepare_request"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.hipchat_operator.HipChatAPISendRoomNotificationOperator.prepare_request" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Used by the execute function. Set the request method, url, and body of HipChat’s |
| REST API call. |
| Override in child class. Each HipChatAPI child operator is responsible for having |
| a prepare_request method call which sets self.method, self.url, and self.body.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.hive_to_dynamodb.HiveToDynamoDBTransferOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.hive_to_dynamodb.</code><code class="descname">HiveToDynamoDBTransferOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/hive_to_dynamodb.html#HiveToDynamoDBTransferOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.hive_to_dynamodb.HiveToDynamoDBTransferOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from Hive to DynamoDB, note that for now the data is loaded |
| into memory before being pushed to DynamoDB, so this operator should |
| be used for smallish amount of data.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against the hive database. (templated)</li> |
| <li><strong>table_name</strong> (<em>str</em>) – target DynamoDB table</li> |
| <li><strong>table_keys</strong> (<em>list</em>) – partition key and sort key</li> |
| <li><strong>pre_process</strong> (<em>function</em>) – implement pre-processing of source data</li> |
| <li><strong>pre_process_args</strong> (<em>list</em>) – list of pre_process function arguments</li> |
| <li><strong>pre_process_kwargs</strong> (<em>dict</em>) – dict of pre_process function arguments</li> |
| <li><strong>region_name</strong> (<em>str</em>) – aws region name (example: us-east-1)</li> |
| <li><strong>schema</strong> (<em>str</em>) – hive database schema</li> |
| <li><strong>hiveserver2_conn_id</strong> (<em>str</em>) – source hive connection</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.jira_operator.JiraOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.jira_operator.</code><code class="descname">JiraOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/jira_operator.html#JiraOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.jira_operator.JiraOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>JiraOperator to interact and perform action on Jira issue tracking system. |
| This operator is designed to use Jira Python SDK: <a class="reference external" href="http://jira.readthedocs.io">http://jira.readthedocs.io</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>jira_conn_id</strong> (<em>str</em>) – reference to a pre-defined Jira Connection</li> |
| <li><strong>jira_method</strong> (<em>str</em>) – method name from Jira Python SDK to be called</li> |
| <li><strong>jira_method_args</strong> (<em>dict</em>) – required method parameters for the jira_method. (templated)</li> |
| <li><strong>result_processor</strong> (<em>function</em>) – function to further process the response from Jira</li> |
| <li><strong>get_jira_resource_method</strong> (<em>function</em>) – function or operator to get jira resource |
| on which the provided jira_method will be executed</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.kubernetes_pod_operator.</code><code class="descname">KubernetesPodOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/kubernetes_pod_operator.html#KubernetesPodOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.kubernetes_pod_operator.KubernetesPodOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a task in a Kubernetes Pod</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>image</strong> (<em>str</em>) – Docker image you wish to launch. Defaults to dockerhub.io, |
| but fully qualified URLS will point to custom repositories</li> |
| <li><strong>cmds</strong> (<em>list of str</em>) – entrypoint of the container. (templated) |
| The docker images’s entrypoint is used if this is not provide.</li> |
| <li><strong>arguments</strong> (<em>list of str</em>) – arguments of to the entrypoint. (templated) |
| The docker image’s CMD is used if this is not provided.</li> |
| <li><strong>image_pull_policy</strong> (<em>str</em>) – Specify a policy to cache or always pull an image</li> |
| <li><strong>image_pull_secrets</strong> (<em>str</em>) – Any image pull secrets to be given to the pod. |
| If more than one secret is required, provide a |
| comma separated list: secret_a,secret_b</li> |
| <li><strong>volume_mounts</strong> (<em>list of VolumeMount</em>) – volumeMounts for launched pod</li> |
| <li><strong>volumes</strong> (<em>list of Volume</em>) – volumes for launched pod. Includes ConfigMaps and PersistentVolumes</li> |
| <li><strong>labels</strong> (<em>dict</em>) – labels to apply to the Pod</li> |
| <li><strong>startup_timeout_seconds</strong> (<em>int</em>) – timeout in seconds to startup the pod</li> |
| <li><strong>name</strong> (<em>str</em>) – name of the task you want to run, |
| will be used to generate a pod id</li> |
| <li><strong>env_vars</strong> (<em>dict</em>) – Environment variables initialized in the container. (templated)</li> |
| <li><strong>secrets</strong> (<em>list of Secret</em>) – Kubernetes secrets to inject in the container, |
| They can be exposed as environment vars or files in a volume.</li> |
| <li><strong>in_cluster</strong> (<em>bool</em>) – run kubernetes client with in_cluster configuration</li> |
| <li><strong>cluster_context</strong> (<em>string</em>) – context that points to kubernetes cluster. |
| Ignored when in_cluster is True. If None, current-context is used.</li> |
| <li><strong>get_logs</strong> (<em>bool</em>) – get the stdout of the container as logs of the tasks</li> |
| <li><strong>affinity</strong> (<em>dict</em>) – A dict containing a group of affinity scheduling rules</li> |
| <li><strong>node_selectors</strong> (<em>dict</em>) – A dict containing a group of scheduling rules</li> |
| <li><strong>config_file</strong> (<em>str</em>) – The path to the Kubernetes config file</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – If xcom_push is True, the content of the file |
| /airflow/xcom/return.json in the container will also be pushed to an |
| XCom when the container completes.</li> |
| <li><strong>hostnetwork</strong> (<em>bool</em>) – If True enable host networking on the pod</li> |
| <li><strong>tolerations</strong> (<em>list tolerations</em>) – A list of kubernetes tolerations</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Param:</th><td class="field-body"><p class="first">namespace: the namespace to run within kubernetes</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">namespace: str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineBatchPredictionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Google Cloud ML Engine prediction job.</p> |
| <p>NOTE: For model origin, users should consider exactly one from the |
| three options below: |
| 1. Populate ‘uri’ field only, which should be a GCS location that |
| points to a tensorflow savedModel directory. |
| 2. Populate ‘model_name’ field only, which refers to an existing |
| model, and the default version of the model will be used. |
| 3. Populate both ‘model_name’ and ‘version_name’ fields, which |
| refers to a specific version of a specific model.</p> |
| <p>In options 2 and 3, both model and version name should contain the |
| minimal identifier. For instance, call</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MLEngineBatchPredictionOperator</span><span class="p">(</span> |
| <span class="o">...</span><span class="p">,</span> |
| <span class="n">model_name</span><span class="o">=</span><span class="s1">'my_model'</span><span class="p">,</span> |
| <span class="n">version_name</span><span class="o">=</span><span class="s1">'my_version'</span><span class="p">,</span> |
| <span class="o">...</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>if the desired model version is |
| “projects/my_project/models/my_model/versions/my_version”.</p> |
| <p>See <a class="reference external" href="https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs">https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs</a> |
| for further documentation on the parameters.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name where the |
| prediction job is submitted. (templated)</li> |
| <li><strong>job_id</strong> (<em>string</em>) – A unique id for the prediction job on Google Cloud |
| ML Engine. (templated)</li> |
| <li><strong>data_format</strong> (<em>string</em>) – The format of the input data. |
| It will default to ‘DATA_FORMAT_UNSPECIFIED’ if is not provided |
| or is not one of [“TEXT”, “TF_RECORD”, “TF_RECORD_GZIP”].</li> |
| <li><strong>input_paths</strong> (<em>list of string</em>) – A list of GCS paths of input data for batch |
| prediction. Accepting wildcard operator <a href="#id7"><span class="problematic" id="id8">*</span></a>, but only at the end. (templated)</li> |
| <li><strong>output_path</strong> (<em>string</em>) – The GCS path where the prediction results are |
| written to. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the |
| prediction job in. (templated)</li> |
| <li><strong>model_name</strong> (<em>string</em>) – The Google Cloud ML Engine model to use for prediction. |
| If version_name is not provided, the default version of this |
| model will be used. |
| Should not be None if version_name is provided. |
| Should be None if uri is provided. (templated)</li> |
| <li><strong>version_name</strong> (<em>string</em>) – The Google Cloud ML Engine model version to use for |
| prediction. |
| Should be None if uri is provided. (templated)</li> |
| <li><strong>uri</strong> (<em>string</em>) – The GCS path of the saved model to use for prediction. |
| Should be None if model_name is provided. |
| It should be a GCS path pointing to a tensorflow SavedModel. (templated)</li> |
| <li><strong>max_worker_count</strong> (<em>int</em>) – The maximum number of workers to be used |
| for parallel processing. Defaults to 10 if not specified.</li> |
| <li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML Engine runtime version to use |
| for batch prediction.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID used for connection to Google |
| Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have doamin-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Raises:</dt> |
| <dd><code class="docutils literal notranslate"><span class="pre">ValueError</span></code>: if a unique model/version origin cannot be determined.</dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for managing a Google Cloud ML Engine model.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine |
| model belongs. (templated)</li> |
| <li><strong>model</strong> (<em>dict</em>) – <p>A dictionary containing the information about the model. |
| If the <cite>operation</cite> is <cite>create</cite>, then the <cite>model</cite> parameter should |
| contain all the information about this model such as <cite>name</cite>.</p> |
| <p>If the <cite>operation</cite> is <cite>get</cite>, the <cite>model</cite> parameter |
| should contain the <cite>name</cite> of the model.</p> |
| </li> |
| <li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new model as provided by the <cite>model</cite> parameter.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets a particular model where the name is specified in <cite>model</cite>.</li> |
| </ul> |
| </li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineVersionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for managing a Google Cloud ML Engine version.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine |
| model belongs.</li> |
| <li><strong>model_name</strong> (<em>string</em>) – The name of the Google Cloud ML Engine model that the version |
| belongs to. (templated)</li> |
| <li><strong>version_name</strong> (<em>string</em>) – A name to use for the version being operated upon. |
| If not None and the <cite>version</cite> argument is None or does not have a value for |
| the <cite>name</cite> key, then this will be populated in the payload for the |
| <cite>name</cite> key. (templated)</li> |
| <li><strong>version</strong> (<em>dict</em>) – A dictionary containing the information about the version. |
| If the <cite>operation</cite> is <cite>create</cite>, <cite>version</cite> should contain all the |
| information about this version such as name, and deploymentUrl. |
| If the <cite>operation</cite> is <cite>get</cite> or <cite>delete</cite>, the <cite>version</cite> parameter |
| should contain the <cite>name</cite> of the version. |
| If it is None, the only <cite>operation</cite> possible would be <cite>list</cite>. (templated)</li> |
| <li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new version in the model specified by <cite>model_name</cite>, |
| in which case the <cite>version</cite> parameter should contain all the |
| information to create that version |
| (e.g. <cite>name</cite>, <cite>deploymentUrl</cite>).</li> |
| <li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets full information of a particular version in the model |
| specified by <cite>model_name</cite>. |
| The name of the version should be specified in the <cite>version</cite> |
| parameter.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">list</span></code>: Lists all available versions of the model specified |
| by <cite>model_name</cite>.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">delete</span></code>: Deletes the version specified in <cite>version</cite> parameter from the |
| model specified by <cite>model_name</cite>). |
| The name of the version should be specified in the <cite>version</cite> |
| parameter.</li> |
| </ul> |
| </li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for launching a MLEngine training job.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name within which MLEngine |
| training job should run (templated).</li> |
| <li><strong>job_id</strong> (<em>string</em>) – A unique templated id for the submitted Google MLEngine |
| training job. (templated)</li> |
| <li><strong>package_uris</strong> (<em>string</em>) – A list of package locations for MLEngine training job, |
| which should include the main training program + any additional |
| dependencies. (templated)</li> |
| <li><strong>training_python_module</strong> (<em>string</em>) – The Python module name to run within MLEngine |
| training job after installing ‘package_uris’ packages. (templated)</li> |
| <li><strong>training_args</strong> (<em>string</em>) – A list of templated command line arguments to pass to |
| the MLEngine training program. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the MLEngine training |
| job in (templated).</li> |
| <li><strong>scale_tier</strong> (<em>string</em>) – Resource tier for MLEngine training job. (templated)</li> |
| <li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML runtime version to use for |
| training. (templated)</li> |
| <li><strong>python_version</strong> (<em>string</em>) – The version of Python used in training. (templated)</li> |
| <li><strong>job_dir</strong> (<em>string</em>) – A Google Cloud Storage path in which to store training |
| outputs and other data needed for training. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>mode</strong> (<em>string</em>) – Can be one of ‘DRY_RUN’/’CLOUD’. In ‘DRY_RUN’ mode, no real |
| training job will be launched, but the MLEngine training job request |
| will be printed out. In ‘CLOUD’ mode, a real MLEngine training job |
| creation request will be issued.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mongo_to_s3.MongoToS3Operator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mongo_to_s3.</code><code class="descname">MongoToS3Operator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mongo_to_s3.html#MongoToS3Operator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mongo_to_s3.MongoToS3Operator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <dl class="docutils"> |
| <dt>Mongo -> S3</dt> |
| <dd><p class="first">A more specific baseOperator meant to move data |
| from mongo via pymongo to s3 via boto</p> |
| <dl class="last docutils"> |
| <dt>things to note</dt> |
| <dd>.execute() is written to depend on .transform() |
| .transform() is meant to be extended by child classes |
| to perform transformations unique to those operators needs</dd> |
| </dl> |
| </dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.mongo_to_s3.MongoToS3Operator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mongo_to_s3.html#MongoToS3Operator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mongo_to_s3.MongoToS3Operator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executed by task_instance at runtime</p> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.contrib.operators.mongo_to_s3.MongoToS3Operator.transform"> |
| <em class="property">static </em><code class="descname">transform</code><span class="sig-paren">(</span><em>docs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mongo_to_s3.html#MongoToS3Operator.transform"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mongo_to_s3.MongoToS3Operator.transform" title="Permalink to this definition">¶</a></dt> |
| <dd><dl class="docutils"> |
| <dt>Processes pyMongo cursor and returns an iterable with each element being</dt> |
| <dd>a JSON serializable dictionary</dd> |
| </dl> |
| <p>Base transform() assumes no processing is needed |
| ie. docs is a pyMongo cursor of documents and cursor just |
| needs to be passed through</p> |
| <p>Override this method for custom transformations</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mysql_to_gcs.</code><code class="descname">MySqlToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mysql_to_gcs.html#MySqlToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copy data from MySQL to Google cloud storage in JSON format.</p> |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator.type_map"> |
| <em class="property">classmethod </em><code class="descname">type_map</code><span class="sig-paren">(</span><em>mysql_type</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mysql_to_gcs.html#MySqlToGoogleCloudStorageOperator.type_map"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator.type_map" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Helper function that maps from MySQL fields to BigQuery fields. Used |
| when a schema_filename is set.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.postgres_to_gcs_operator.PostgresToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.postgres_to_gcs_operator.</code><code class="descname">PostgresToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/postgres_to_gcs_operator.html#PostgresToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.postgres_to_gcs_operator.PostgresToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copy data from Postgres to Google Cloud Storage in JSON format.</p> |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.postgres_to_gcs_operator.PostgresToGoogleCloudStorageOperator.convert_types"> |
| <em class="property">classmethod </em><code class="descname">convert_types</code><span class="sig-paren">(</span><em>value</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/postgres_to_gcs_operator.html#PostgresToGoogleCloudStorageOperator.convert_types"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.postgres_to_gcs_operator.PostgresToGoogleCloudStorageOperator.convert_types" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Takes a value from Postgres, and converts it to a value that’s safe for |
| JSON/Google Cloud Storage/BigQuery. Dates are converted to UTC seconds. |
| Decimals are converted to floats. Times are converted to seconds.</p> |
| </dd></dl> |
| |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.postgres_to_gcs_operator.PostgresToGoogleCloudStorageOperator.type_map"> |
| <em class="property">classmethod </em><code class="descname">type_map</code><span class="sig-paren">(</span><em>postgres_type</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/postgres_to_gcs_operator.html#PostgresToGoogleCloudStorageOperator.type_map"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.postgres_to_gcs_operator.PostgresToGoogleCloudStorageOperator.type_map" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Helper function that maps from Postgres fields to BigQuery fields. Used |
| when a schema_filename is set.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.pubsub_operator.PubSubTopicCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.pubsub_operator.</code><code class="descname">PubSubTopicCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/pubsub_operator.html#PubSubTopicCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.pubsub_operator.PubSubTopicCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Create a PubSub topic.</p> |
| <p>By default, if the topic already exists, this operator will |
| not cause the DAG to fail.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'successful DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubTopicCreateOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">topic</span><span class="o">=</span><span class="s1">'my_new_topic'</span><span class="p">)</span> |
| <span class="o">>></span> <span class="n">PubSubTopicCreateOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">topic</span><span class="o">=</span><span class="s1">'my_new_topic'</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The operator can be configured to fail if the topic already exists.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'failing DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubTopicCreateOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">topic</span><span class="o">=</span><span class="s1">'my_new_topic'</span><span class="p">)</span> |
| <span class="o">>></span> <span class="n">PubSubTopicCreateOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">topic</span><span class="o">=</span><span class="s1">'my_new_topic'</span><span class="p">,</span> |
| <span class="n">fail_if_exists</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Both <code class="docutils literal notranslate"><span class="pre">project</span></code> and <code class="docutils literal notranslate"><span class="pre">topic</span></code> are templated so you can use |
| variables in them.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.pubsub_operator.PubSubTopicDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.pubsub_operator.</code><code class="descname">PubSubTopicDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/pubsub_operator.html#PubSubTopicDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.pubsub_operator.PubSubTopicDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Delete a PubSub topic.</p> |
| <p>By default, if the topic does not exist, this operator will |
| not cause the DAG to fail.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'successful DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubTopicDeleteOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">topic</span><span class="o">=</span><span class="s1">'non_existing_topic'</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The operator can be configured to fail if the topic does not exist.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'failing DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubTopicCreateOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">topic</span><span class="o">=</span><span class="s1">'non_existing_topic'</span><span class="p">,</span> |
| <span class="n">fail_if_not_exists</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Both <code class="docutils literal notranslate"><span class="pre">project</span></code> and <code class="docutils literal notranslate"><span class="pre">topic</span></code> are templated so you can use |
| variables in them.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.pubsub_operator.PubSubSubscriptionCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.pubsub_operator.</code><code class="descname">PubSubSubscriptionCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/pubsub_operator.html#PubSubSubscriptionCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.pubsub_operator.PubSubSubscriptionCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Create a PubSub subscription.</p> |
| <p>By default, the subscription will be created in <code class="docutils literal notranslate"><span class="pre">topic_project</span></code>. If |
| <code class="docutils literal notranslate"><span class="pre">subscription_project</span></code> is specified and the GCP credentials allow, the |
| Subscription can be created in a different project from its topic.</p> |
| <p>By default, if the subscription already exists, this operator will |
| not cause the DAG to fail. However, the topic must exist in the project.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'successful DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubSubscriptionCreateOperator</span><span class="p">(</span> |
| <span class="n">topic_project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> <span class="n">topic</span><span class="o">=</span><span class="s1">'my-topic'</span><span class="p">,</span> |
| <span class="n">subscription</span><span class="o">=</span><span class="s1">'my-subscription'</span><span class="p">)</span> |
| <span class="o">>></span> <span class="n">PubSubSubscriptionCreateOperator</span><span class="p">(</span> |
| <span class="n">topic_project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> <span class="n">topic</span><span class="o">=</span><span class="s1">'my-topic'</span><span class="p">,</span> |
| <span class="n">subscription</span><span class="o">=</span><span class="s1">'my-subscription'</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The operator can be configured to fail if the subscription already exists.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'failing DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubSubscriptionCreateOperator</span><span class="p">(</span> |
| <span class="n">topic_project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> <span class="n">topic</span><span class="o">=</span><span class="s1">'my-topic'</span><span class="p">,</span> |
| <span class="n">subscription</span><span class="o">=</span><span class="s1">'my-subscription'</span><span class="p">)</span> |
| <span class="o">>></span> <span class="n">PubSubSubscriptionCreateOperator</span><span class="p">(</span> |
| <span class="n">topic_project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> <span class="n">topic</span><span class="o">=</span><span class="s1">'my-topic'</span><span class="p">,</span> |
| <span class="n">subscription</span><span class="o">=</span><span class="s1">'my-subscription'</span><span class="p">,</span> <span class="n">fail_if_exists</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Finally, subscription is not required. If not passed, the operator will |
| generated a universally unique identifier for the subscription’s name.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> <span class="o">>></span> <span class="n">PubSubSubscriptionCreateOperator</span><span class="p">(</span> |
| <span class="n">topic_project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> <span class="n">topic</span><span class="o">=</span><span class="s1">'my-topic'</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><code class="docutils literal notranslate"><span class="pre">topic_project</span></code>, <code class="docutils literal notranslate"><span class="pre">topic</span></code>, <code class="docutils literal notranslate"><span class="pre">subscription</span></code>, and |
| <code class="docutils literal notranslate"><span class="pre">subscription</span></code> are templated so you can use variables in them.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.pubsub_operator.PubSubSubscriptionDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.pubsub_operator.</code><code class="descname">PubSubSubscriptionDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/pubsub_operator.html#PubSubSubscriptionDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.pubsub_operator.PubSubSubscriptionDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Delete a PubSub subscription.</p> |
| <p>By default, if the subscription does not exist, this operator will |
| not cause the DAG to fail.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'successful DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubSubscriptionDeleteOperator</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">subscription</span><span class="o">=</span><span class="s1">'non-existing'</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The operator can be configured to fail if the subscription already exists.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'failing DAG'</span><span class="p">)</span> <span class="k">as</span> <span class="n">dag</span><span class="p">:</span> |
| <span class="p">(</span> |
| <span class="n">dag</span> |
| <span class="o">>></span> <span class="n">PubSubSubscriptionDeleteOperator</span><span class="p">(</span> |
| <span class="n">project</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> <span class="n">subscription</span><span class="o">=</span><span class="s1">'non-existing'</span><span class="p">,</span> |
| <span class="n">fail_if_not_exists</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><code class="docutils literal notranslate"><span class="pre">project</span></code>, and <code class="docutils literal notranslate"><span class="pre">subscription</span></code> are templated so you can use |
| variables in them.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.pubsub_operator.PubSubPublishOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.pubsub_operator.</code><code class="descname">PubSubPublishOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/pubsub_operator.html#PubSubPublishOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.pubsub_operator.PubSubPublishOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Publish messages to a PubSub topic.</p> |
| <p>Each Task publishes all provided messages to the same topic |
| in a single GCP project. If the topic does not exist, this |
| task will fail.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span> from base64 import b64encode as b64e |
| |
| m1 = {'data': b64e('Hello, World!'), |
| 'attributes': {'type': 'greeting'} |
| } |
| m2 = {'data': b64e('Knock, knock')} |
| m3 = {'attributes': {'foo': ''}} |
| |
| t1 = PubSubPublishOperator( |
| project='my-project',topic='my_topic', |
| messages=[m1, m2, m3], |
| create_topic=True, |
| dag=dag) |
| |
| ``project`` , ``topic``, and ``messages`` are templated so you can use |
| </pre></div> |
| </div> |
| <p>variables in them.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.qubole_check_operator.QuboleCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_check_operator.</code><code class="descname">QuboleCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_check_operator.html#QuboleCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_check_operator.QuboleCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a>, <a class="reference internal" href="integration.html#airflow.contrib.operators.qubole_operator.QuboleOperator" title="airflow.contrib.operators.qubole_operator.QuboleOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.qubole_operator.QuboleOperator</span></code></a></p> |
| <p>Performs checks against Qubole Commands. <code class="docutils literal notranslate"><span class="pre">QuboleCheckOperator</span></code> expects |
| a command that will be executed on QDS. |
| By default, each value on first row of the result of this Qubole Command |
| is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code>, the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alerts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</td> |
| </tr> |
| </tbody> |
| </table> |
| <p>kwargs:</p> |
| <blockquote> |
| <div><p>Arguments specific to Qubole command can be referred from QuboleOperator docs.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name" colspan="2">results_parser_callable:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">This is an optional parameter to |
| extend the flexibility of parsing the results of Qubole |
| command to the users. This is a python callable which |
| can hold the logic to parse list of rows returned by Qubole command. |
| By default, only the values on first row are used for performing checks. |
| This callable should return a list of records on |
| which the checks have to be performed.</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">All fields in common with template fields of |
| QuboleOperator and CheckOperator are template-supported.</p> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.qubole_check_operator.QuboleValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_check_operator.</code><code class="descname">QuboleValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_check_operator.html#QuboleValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_check_operator.QuboleValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a>, <a class="reference internal" href="integration.html#airflow.contrib.operators.qubole_operator.QuboleOperator" title="airflow.contrib.operators.qubole_operator.QuboleOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.qubole_operator.QuboleOperator</span></code></a></p> |
| <p>Performs a simple value check using Qubole command. |
| By default, each value on the first row of this |
| Qubole command is compared with a pre-defined value. |
| The check fails and errors out if the output of the command |
| is not within the permissible limit of expected value.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li> |
| <li><strong>pass_value</strong> (<em>str/int/float</em>) – Expected value of the query results.</li> |
| <li><strong>tolerance</strong> (<em>int/float</em>) – Defines the permissible pass_value range, for example if |
| tolerance is 2, the Qubole command output can be anything between |
| -2*pass_value and 2*pass_value, without the operator erring out.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>kwargs:</p> |
| <blockquote> |
| <div><p>Arguments specific to Qubole command can be referred from QuboleOperator docs.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name" colspan="2">results_parser_callable:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">This is an optional parameter to |
| extend the flexibility of parsing the results of Qubole |
| command to the users. This is a python callable which |
| can hold the logic to parse list of rows returned by Qubole command. |
| By default, only the values on first row are used for performing checks. |
| This callable should return a list of records on |
| which the checks have to be performed.</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">All fields in common with template fields of |
| QuboleOperator and ValueCheckOperator are template-supported.</p> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.qubole_operator.QuboleOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_operator.</code><code class="descname">QuboleOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_operator.html#QuboleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute tasks (commands) on QDS (<a class="reference external" href="https://qubole.com">https://qubole.com</a>).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>kwargs:</dt> |
| <dd><table class="first docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">command_type:</th><td class="field-body">type of command to be executed, e.g. hivecmd, shellcmd, hadoopcmd</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">tags:</th><td class="field-body">array of tags to be assigned with the command</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">cluster_label:</th><td class="field-body">cluster label on which the command will be executed</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">name:</th><td class="field-body">name to be given to command</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">notify:</th><td class="field-body">whether to send email on command completion or not (default is False)</td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Arguments specific to command types</strong></p> |
| <dl class="last docutils"> |
| <dt>hivecmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">query:</th><td class="field-body">inline query statement</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">sample_size:</th><td class="field-body">size of sample in bytes on which to run query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>prestocmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">query:</th><td class="field-body">inline query statement</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>hadoopcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">sub_commnad:</th><td class="field-body">must be one these [“jar”, “s3distcp”, “streaming”] followed by |
| 1 or more args</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>shellcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">script:</th><td class="field-body">inline command with args</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">files:</th><td class="field-body">list of files in s3 bucket as file1,file2 format. These files will be |
| copied into the working directory where the qubole command is being |
| executed.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">archives:</th><td class="field-body">list of archives in s3 bucket as archive1,archive2 format. These |
| will be unarchived intothe working directory where the qubole command is |
| being executed</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">parameters:</th><td class="field-body">any extra args which need to be passed to script (only when |
| script_location is supplied)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>pigcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">script:</th><td class="field-body">inline query statement (latin_statements)</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing pig query</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">parameters:</th><td class="field-body">any extra args which need to be passed to script (only when |
| script_location is supplied</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>sparkcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">program:</th><td class="field-body">the complete Spark Program in Scala, SQL, Command, R, or Python</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">cmdline:</th><td class="field-body">spark-submit command line, all required information must be specify |
| in cmdline itself.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">sql:</th><td class="field-body">inline sql query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">language:</th><td class="field-body">language of the program, Scala, SQL, Command, R, or Python</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">app_id:</th><td class="field-body">ID of an Spark job server app</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">arguments:</th><td class="field-body">spark-submit command line arguments</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">user_program_arguments:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">arguments that the user program takes in</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>dbtapquerycmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">db_tap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">query:</th><td class="field-body">inline query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>dbexportcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">mode:</th><td class="field-body">1 (simple), 2 (advance)</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">hive_table:</th><td class="field-body">Name of the hive table</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">partition_spec:</th><td class="field-body">partition specification for Hive table.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">dbtap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">db_table:</th><td class="field-body">name of the db table</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">db_update_mode:</th><td class="field-body">allowinsert or updateonly</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">db_update_keys:</th><td class="field-body">columns used to determine the uniqueness of rows</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">export_dir:</th><td class="field-body">HDFS/S3 location from which data will be exported.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">fields_terminated_by:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">hex of the char used as column separator in the dataset</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>dbimportcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">mode:</th><td class="field-body">1 (simple), 2 (advance)</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">hive_table:</th><td class="field-body">Name of the hive table</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">dbtap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">db_table:</th><td class="field-body">name of the db table</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">where_clause:</th><td class="field-body">where clause, if any</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">parallelism:</th><td class="field-body">number of parallel db connections to use for extracting data</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">extract_query:</th><td class="field-body">SQL query to extract data from db. $CONDITIONS must be part |
| of the where clause.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">boundary_query:</th><td class="field-body">Query to be used get range of row IDs to be extracted</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">split_column:</th><td class="field-body">Column used as row ID to split data into ranges (mode 2)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p>Following fields are template-supported : <code class="docutils literal notranslate"><span class="pre">query</span></code>, <code class="docutils literal notranslate"><span class="pre">script_location</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">sub_command</span></code>, <code class="docutils literal notranslate"><span class="pre">script</span></code>, <code class="docutils literal notranslate"><span class="pre">files</span></code>, <code class="docutils literal notranslate"><span class="pre">archives</span></code>, <code class="docutils literal notranslate"><span class="pre">program</span></code>, <code class="docutils literal notranslate"><span class="pre">cmdline</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">sql</span></code>, <code class="docutils literal notranslate"><span class="pre">where_clause</span></code>, <code class="docutils literal notranslate"><span class="pre">extract_query</span></code>, <code class="docutils literal notranslate"><span class="pre">boundary_query</span></code>, <code class="docutils literal notranslate"><span class="pre">macros</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">tags</span></code>, <code class="docutils literal notranslate"><span class="pre">name</span></code>, <code class="docutils literal notranslate"><span class="pre">parameters</span></code>, <code class="docutils literal notranslate"><span class="pre">dbtap_id</span></code>, <code class="docutils literal notranslate"><span class="pre">hive_table</span></code>, <code class="docutils literal notranslate"><span class="pre">db_table</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">split_column</span></code>, <code class="docutils literal notranslate"><span class="pre">note_id</span></code>, <code class="docutils literal notranslate"><span class="pre">db_update_keys</span></code>, <code class="docutils literal notranslate"><span class="pre">export_dir</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">partition_spec</span></code>, <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code>, <code class="docutils literal notranslate"><span class="pre">arguments</span></code>, <code class="docutils literal notranslate"><span class="pre">user_program_arguments</span></code>.</p> |
| <blockquote class="last"> |
| <div>You can also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template driven use cases.</div></blockquote> |
| </div> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">In QuboleOperator there is a default handler for task failures and retries, |
| which generally kills the command running at QDS for the corresponding task |
| instance. You can override this behavior by providing your own failure and retry |
| handler in task definition.</p> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_list_operator.</code><code class="descname">S3ListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all objects from the bucket with the given string prefix in name.</p> |
| <p>This operator returns a python list with the name of objects which can be |
| used by <cite>xcom</cite> in the downstream task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string to filters the objects whose name begin with |
| such prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The connection ID to use when connecting to S3 storage.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following operator would list all the files |
| (excluding subfolders) from the S3 |
| <code class="docutils literal notranslate"><span class="pre">customers/2018/04/</span></code> key in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">s3_file</span> <span class="o">=</span> <span class="n">S3ListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'list_3s_files'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'customers/2018/04/'</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="s1">'/'</span><span class="p">,</span> |
| <span class="n">aws_conn_id</span><span class="o">=</span><span class="s1">'aws_customers_conn'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_to_gcs_operator.</code><code class="descname">S3ToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="airflow.contrib.operators.s3_list_operator.S3ListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.s3_list_operator.S3ListOperator</span></code></a></p> |
| <p>Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage |
| destination path.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| such prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The source S3 connection</li> |
| <li><strong>dest_gcs_conn_id</strong> (<em>string</em>) – The destination connection ID to use |
| when connecting to Google Cloud Storage.</li> |
| <li><strong>dest_gcs</strong> (<em>string</em>) – The destination Google Cloud Storage bucket and prefix |
| where you want to store the files. (templated)</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Whether you want to replace existing destination files |
| or not.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">s3_to_gcs_op</span> <span class="o">=</span> <span class="n">S3ToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'s3_to_gcs_example'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'my-s3-bucket'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'data/customers-201804'</span><span class="p">,</span> |
| <span class="n">dest_gcs_conn_id</span><span class="o">=</span><span class="s1">'google_cloud_default'</span><span class="p">,</span> |
| <span class="n">dest_gcs</span><span class="o">=</span><span class="s1">'gs://my.gcs.bucket/some/customers/'</span><span class="p">,</span> |
| <span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Note that <code class="docutils literal notranslate"><span class="pre">bucket</span></code>, <code class="docutils literal notranslate"><span class="pre">prefix</span></code>, <code class="docutils literal notranslate"><span class="pre">delimiter</span></code> and <code class="docutils literal notranslate"><span class="pre">dest_gcs</span></code> are |
| templated, so you can use variables in them if you wish.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_to_gcs_transfer_operator.S3ToGoogleCloudStorageTransferOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_to_gcs_transfer_operator.</code><code class="descname">S3ToGoogleCloudStorageTransferOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_transfer_operator.html#S3ToGoogleCloudStorageTransferOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_transfer_operator.S3ToGoogleCloudStorageTransferOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Synchronizes an S3 bucket with a Google Cloud Storage bucket using the |
| GCP Storage Transfer Service.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>s3_bucket</strong> (<em>str</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>gcs_bucket</strong> (<em>str</em>) – The destination Google Cloud Storage bucket |
| where you want to store the files. (templated)</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional ID of the Google Cloud Platform Console project that |
| owns the job</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The source S3 connection</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The destination connection ID to use |
| when connecting to Google Cloud Storage.</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>description</strong> (<em>str</em>) – Optional transfer service job description</li> |
| <li><strong>schedule</strong> (<em>dict</em>) – Optional transfer service schedule; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs</a>. |
| If not set, run transfer job once as soon as the operator runs</li> |
| <li><strong>object_conditions</strong> (<em>dict</em>) – Optional transfer service object conditions; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec</a></li> |
| <li><strong>transfer_options</strong> (<em>dict</em>) – Optional transfer service transfer options; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec</a></li> |
| <li><strong>wait</strong> (<em>bool</em>) – Wait for transfer to finish</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">s3_to_gcs_transfer_op</span> <span class="o">=</span> <span class="n">S3ToGoogleCloudStorageTransferOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'s3_to_gcs_transfer_example'</span><span class="p">,</span> |
| <span class="n">s3_bucket</span><span class="o">=</span><span class="s1">'my-s3-bucket'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="n">gcs_bucket</span><span class="o">=</span><span class="s1">'my-gcs-bucket'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my_dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_base_operator.</code><code class="descname">SageMakerBaseOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_base_operator.html#SageMakerBaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>This is the base operator for all SageMaker operators.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – The configuration necessary to start a training job (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_endpoint_operator.SageMakerEndpointOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_operator.</code><code class="descname">SageMakerEndpointOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_operator.html#SageMakerEndpointOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_endpoint_operator.SageMakerEndpointOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker endpoint.</p> |
| <p>This operator returns The ARN of the endpoint created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint.</p> |
| <p>If you need to create a SageMaker endpoint based on an existed |
| SageMaker model and an existed SageMaker endpoint config:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">endpoint_configuration</span><span class="p">;</span> |
| </pre></div> |
| </div> |
| <p>If you need to create all of SageMaker model, SageMaker endpoint-config and SageMaker endpoint:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'Model'</span><span class="p">:</span> <span class="n">model_configuration</span><span class="p">,</span> |
| <span class="s1">'EndpointConfig'</span><span class="p">:</span> <span class="n">endpoint_config_configuration</span><span class="p">,</span> |
| <span class="s1">'Endpoint'</span><span class="p">:</span> <span class="n">endpoint_configuration</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>For details of the configuration parameter of model_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| <p>For details of the configuration parameter of endpoint_config_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p> |
| <p>For details of the configuration parameter of endpoint_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether the operator should wait until the endpoint creation finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, this is the time interval, in seconds, that this operation |
| waits before polling the status of the endpoint creation.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, this operation fails if the endpoint creation doesn’t |
| finish within max_ingestion_time seconds. If you set this parameter to None it never times out.</li> |
| <li><strong>operation</strong> (<em>str</em>) – Whether to create an endpoint or update an endpoint. Must be either ‘create or ‘update’.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_endpoint_config_operator.SageMakerEndpointConfigOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_config_operator.</code><code class="descname">SageMakerEndpointConfigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_config_operator.html#SageMakerEndpointConfigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_endpoint_config_operator.SageMakerEndpointConfigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker endpoint config.</p> |
| <p>This operator returns The ARN of the endpoint config created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint config.</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_model_operator.SageMakerModelOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_model_operator.</code><code class="descname">SageMakerModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_model_operator.html#SageMakerModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_model_operator.SageMakerModelOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker model.</p> |
| <p>This operator returns The ARN of the model created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create a model.</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_training_operator.SageMakerTrainingOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_training_operator.</code><code class="descname">SageMakerTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_training_operator.html#SageMakerTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_training_operator.SageMakerTrainingOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker training job.</p> |
| <p>This operator returns The ARN of the training job created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a training job (templated).</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_training_job()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – If wait is set to True, the time interval, in seconds, |
| that the operation waits to check the status of the training job.</li> |
| <li><strong>print_log</strong> (<em>bool</em>) – if the operator should print the cloudwatch log during training</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – if wait is set to be true, this is the time interval |
| in seconds which the operator will check the status of the training job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails if the training job |
| doesn’t finish within max_ingestion_time seconds. If you set this parameter to None, |
| the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_transform_operator.SageMakerTransformOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_transform_operator.</code><code class="descname">SageMakerTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_transform_operator.html#SageMakerTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_transform_operator.SageMakerTransformOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker transform job.</p> |
| <p>This operator returns The ARN of the model created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a transform job (templated).</p> |
| <p>If you need to create a SageMaker transform job based on an existed SageMaker model:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">transform_config</span> |
| </pre></div> |
| </div> |
| <p>If you need to create both SageMaker model and SageMaker Transform job:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'Model'</span><span class="p">:</span> <span class="n">model_config</span><span class="p">,</span> |
| <span class="s1">'Transform'</span><span class="p">:</span> <span class="n">transform_config</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>For details of the configuration parameter of transform_config see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_transform_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_transform_job()</span></code></a></p> |
| <p>For details of the configuration parameter of model_config, See: |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the transform job finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds, |
| that this operation waits to check the status of the transform job.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails |
| if the transform job doesn’t finish within max_ingestion_time seconds. If you |
| set this parameter to None, the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_tuning_operator.SageMakerTuningOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_tuning_operator.</code><code class="descname">SageMakerTuningOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_tuning_operator.html#SageMakerTuningOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_tuning_operator.SageMakerTuningOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker hyperparameter tuning job.</p> |
| <p>This operator returns The ARN of the tuning job created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a tuning job (templated).</p> |
| <p>For details of the configuration parameter see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_hyper_parameter_tuning_job()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the tuning job finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds, |
| that this operation waits to check the status of the tuning job.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails |
| if the tuning job doesn’t finish within max_ingestion_time seconds. If you |
| set this parameter to None, the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sftp_operator.SFTPOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sftp_operator.</code><code class="descname">SFTPOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sftp_operator.html#SFTPOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sftp_operator.SFTPOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>SFTPOperator for transferring files from remote host to local or vice a versa. |
| This operator uses ssh_hook to open sftp transport channel that serve as basis |
| for file transfer.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>ssh_hook</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">SSHHook</span></code>) – predefined ssh_hook to use for remote execution. |
| Either <cite>ssh_hook</cite> or <cite>ssh_conn_id</cite> needs to be provided.</li> |
| <li><strong>ssh_conn_id</strong> (<em>str</em>) – connection id from airflow Connections. |
| <cite>ssh_conn_id</cite> will be ingored if <cite>ssh_hook</cite> is provided.</li> |
| <li><strong>remote_host</strong> (<em>str</em>) – remote host to connect (templated) |
| Nullable. If provided, it will replace the <cite>remote_host</cite> which was |
| defined in <cite>ssh_hook</cite> or predefined in the connection of <cite>ssh_conn_id</cite>.</li> |
| <li><strong>local_filepath</strong> (<em>str</em>) – local file path to get or put. (templated)</li> |
| <li><strong>remote_filepath</strong> (<em>str</em>) – remote file path to get or put. (templated)</li> |
| <li><strong>operation</strong> (<em>str</em>) – specify operation ‘get’ or ‘put’, defaults to put</li> |
| <li><strong>confirm</strong> (<em>bool</em>) – specify if the SFTP operation should be confirmed, defaults to True</li> |
| <li><strong>create_intermediate_dirs</strong> (<em>bool</em>) – <p>create missing intermediate directories when |
| copying from remote to local and vice-versa. Default is False.</p> |
| <p>Example: The following task would copy <code class="docutils literal notranslate"><span class="pre">file.txt</span></code> to the remote host |
| at <code class="docutils literal notranslate"><span class="pre">/tmp/tmp1/tmp2/</span></code> while creating <code class="docutils literal notranslate"><span class="pre">tmp</span></code>,``tmp1`` and <code class="docutils literal notranslate"><span class="pre">tmp2</span></code> if they |
| don’t exist. If the parameter is not passed it would error as the directory |
| does not exist.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">put_file</span> <span class="o">=</span> <span class="n">SFTPOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s2">"test_sftp"</span><span class="p">,</span> |
| <span class="n">ssh_conn_id</span><span class="o">=</span><span class="s2">"ssh_default"</span><span class="p">,</span> |
| <span class="n">local_filepath</span><span class="o">=</span><span class="s2">"/tmp/file.txt"</span><span class="p">,</span> |
| <span class="n">remote_filepath</span><span class="o">=</span><span class="s2">"/tmp/tmp1/tmp2/file.txt"</span><span class="p">,</span> |
| <span class="n">operation</span><span class="o">=</span><span class="s2">"put"</span><span class="p">,</span> |
| <span class="n">create_intermediate_dirs</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.slack_webhook_operator.SlackWebhookOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.slack_webhook_operator.</code><code class="descname">SlackWebhookOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/slack_webhook_operator.html#SlackWebhookOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.slack_webhook_operator.SlackWebhookOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.operators.http_operator.SimpleHttpOperator" title="airflow.operators.http_operator.SimpleHttpOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.http_operator.SimpleHttpOperator</span></code></a></p> |
| <p>This operator allows you to post messages to Slack using incoming webhooks. |
| Takes both Slack webhook token directly and connection that has Slack webhook token. |
| If both supplied, Slack webhook token will be used.</p> |
| <p>Each Slack webhook token can be pre-configured to use a specific channel, username and |
| icon. You can override these defaults in this hook.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>http_conn_id</strong> (<em>str</em>) – connection that has Slack webhook token in the extra field</li> |
| <li><strong>webhook_token</strong> (<em>str</em>) – Slack webhook token</li> |
| <li><strong>message</strong> (<em>str</em>) – The message you want to send on Slack</li> |
| <li><strong>channel</strong> (<em>str</em>) – The channel the message should be posted to</li> |
| <li><strong>username</strong> (<em>str</em>) – The username to post to slack with</li> |
| <li><strong>icon_emoji</strong> (<em>str</em>) – The emoji to use as icon for the user posting to Slack</li> |
| <li><strong>link_names</strong> (<em>bool</em>) – Whether or not to find and link channel and usernames in your |
| message</li> |
| <li><strong>proxy</strong> (<em>str</em>) – Proxy to use to make the Slack webhook call</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.slack_webhook_operator.SlackWebhookOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/slack_webhook_operator.html#SlackWebhookOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.slack_webhook_operator.SlackWebhookOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Call the SlackWebhookHook to post the provided Slack message</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.snowflake_operator.SnowflakeOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.snowflake_operator.</code><code class="descname">SnowflakeOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/snowflake_operator.html#SnowflakeOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.snowflake_operator.SnowflakeOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes sql code in a Snowflake database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>snowflake_conn_id</strong> (<em>string</em>) – reference to specific snowflake connection id</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'</em>) – the sql code to be executed. (templated)</li> |
| <li><strong>warehouse</strong> (<em>string</em>) – name of warehouse which overwrite defined |
| one in connection</li> |
| <li><strong>database</strong> (<em>string</em>) – name of database which overwrite defined one in connection</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.spark_jdbc_operator.SparkJDBCOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.spark_jdbc_operator.</code><code class="descname">SparkJDBCOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/spark_jdbc_operator.html#SparkJDBCOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.spark_jdbc_operator.SparkJDBCOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator" title="airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator</span></code></a></p> |
| <p>This operator extends the SparkSubmitOperator specifically for performing data |
| transfers to/from JDBC-based databases with Apache Spark. As with the |
| SparkSubmitOperator, it assumes that the “spark-submit” binary is available on the |
| PATH.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>spark_app_name</strong> (<em>str</em>) – Name of the job (default airflow-spark-jdbc)</li> |
| <li><strong>spark_conn_id</strong> (<em>str</em>) – Connection id as configured in Airflow administration</li> |
| <li><strong>spark_conf</strong> (<em>dict</em>) – Any additional Spark configuration properties</li> |
| <li><strong>spark_py_files</strong> (<em>str</em>) – Additional python files used (.zip, .egg, or .py)</li> |
| <li><strong>spark_files</strong> (<em>str</em>) – Additional files to upload to the container running the job</li> |
| <li><strong>spark_jars</strong> (<em>str</em>) – Additional jars to upload and add to the driver and |
| executor classpath</li> |
| <li><strong>num_executors</strong> (<em>int</em>) – number of executor to run. This should be set so as to manage |
| the number of connections made with the JDBC database</li> |
| <li><strong>executor_cores</strong> (<em>int</em>) – Number of cores per executor</li> |
| <li><strong>executor_memory</strong> (<em>str</em>) – Memory per executor (e.g. 1000M, 2G)</li> |
| <li><strong>driver_memory</strong> (<em>str</em>) – Memory allocated to the driver (e.g. 1000M, 2G)</li> |
| <li><strong>verbose</strong> (<em>bool</em>) – Whether to pass the verbose flag to spark-submit for debugging</li> |
| <li><strong>keytab</strong> (<em>str</em>) – Full path to the file that contains the keytab</li> |
| <li><strong>principal</strong> (<em>str</em>) – The name of the kerberos principal used for keytab</li> |
| <li><strong>cmd_type</strong> (<em>str</em>) – Which way the data should flow. 2 possible values: |
| spark_to_jdbc: data written by spark from metastore to jdbc |
| jdbc_to_spark: data written by spark from jdbc to metastore</li> |
| <li><strong>jdbc_table</strong> (<em>str</em>) – The name of the JDBC table</li> |
| <li><strong>jdbc_conn_id</strong> – Connection id used for connection to JDBC database</li> |
| <li><strong>jdbc_driver</strong> (<em>str</em>) – Name of the JDBC driver to use for the JDBC connection. This |
| driver (usually a jar) should be passed in the ‘jars’ parameter</li> |
| <li><strong>metastore_table</strong> (<em>str</em>) – The name of the metastore table,</li> |
| <li><strong>jdbc_truncate</strong> (<em>bool</em>) – (spark_to_jdbc only) Whether or not Spark should truncate or |
| drop and recreate the JDBC table. This only takes effect if |
| ‘save_mode’ is set to Overwrite. Also, if the schema is |
| different, Spark cannot truncate, and will drop and recreate</li> |
| <li><strong>save_mode</strong> (<em>str</em>) – The Spark save-mode to use (e.g. overwrite, append, etc.)</li> |
| <li><strong>save_format</strong> (<em>str</em>) – (jdbc_to_spark-only) The Spark save-format to use (e.g. parquet)</li> |
| <li><strong>batch_size</strong> (<em>int</em>) – (spark_to_jdbc only) The size of the batch to insert per round |
| trip to the JDBC database. Defaults to 1000</li> |
| <li><strong>fetch_size</strong> (<em>int</em>) – (jdbc_to_spark only) The size of the batch to fetch per round trip |
| from the JDBC database. Default depends on the JDBC driver</li> |
| <li><strong>num_partitions</strong> (<em>int</em>) – The maximum number of partitions that can be used by Spark |
| simultaneously, both for spark_to_jdbc and jdbc_to_spark |
| operations. This will also cap the number of JDBC connections |
| that can be opened</li> |
| <li><strong>partition_column</strong> (<em>str</em>) – (jdbc_to_spark-only) A numeric column to be used to |
| partition the metastore table by. If specified, you must |
| also specify: |
| num_partitions, lower_bound, upper_bound</li> |
| <li><strong>lower_bound</strong> (<em>int</em>) – (jdbc_to_spark-only) Lower bound of the range of the numeric |
| partition column to fetch. If specified, you must also specify: |
| num_partitions, partition_column, upper_bound</li> |
| <li><strong>upper_bound</strong> (<em>int</em>) – (jdbc_to_spark-only) Upper bound of the range of the numeric |
| partition column to fetch. If specified, you must also specify: |
| num_partitions, partition_column, lower_bound</li> |
| <li><strong>create_table_column_types</strong> – (spark_to_jdbc-only) The database column data types |
| to use instead of the defaults, when creating the |
| table. Data type information should be specified in |
| the same format as CREATE TABLE columns syntax |
| (e.g: “name CHAR(64), comments VARCHAR(1024)”). |
| The specified types should be valid spark sql data |
| types.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">jdbc_conn_id: str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.spark_jdbc_operator.SparkJDBCOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/spark_jdbc_operator.html#SparkJDBCOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.spark_jdbc_operator.SparkJDBCOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Call the SparkSubmitHook to run the provided spark job</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.spark_sql_operator.SparkSqlOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.spark_sql_operator.</code><code class="descname">SparkSqlOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/spark_sql_operator.html#SparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.spark_sql_operator.SparkSqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute Spark SQL query</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – The SQL query to execute. (templated)</li> |
| <li><strong>conf</strong> (<em>str</em><em> (</em><em>format: PROP=VALUE</em><em>)</em>) – arbitrary Spark configuration property</li> |
| <li><strong>conn_id</strong> (<em>str</em>) – connection_id string</li> |
| <li><strong>total_executor_cores</strong> (<em>int</em>) – (Standalone & Mesos only) Total cores for all |
| executors (Default: all the available cores on the worker)</li> |
| <li><strong>executor_cores</strong> (<em>int</em>) – (Standalone & YARN only) Number of cores per |
| executor (Default: 2)</li> |
| <li><strong>executor_memory</strong> (<em>str</em>) – Memory per executor (e.g. 1000M, 2G) (Default: 1G)</li> |
| <li><strong>keytab</strong> (<em>str</em>) – Full path to the file that contains the keytab</li> |
| <li><strong>master</strong> (<em>str</em>) – spark://host:port, mesos://host:port, yarn, or local</li> |
| <li><strong>name</strong> (<em>str</em>) – Name of the job</li> |
| <li><strong>num_executors</strong> (<em>int</em>) – Number of executors to launch</li> |
| <li><strong>verbose</strong> (<em>bool</em>) – Whether to pass the verbose flag to spark-sql</li> |
| <li><strong>yarn_queue</strong> (<em>str</em>) – The YARN queue to submit to (Default: “default”)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.spark_sql_operator.SparkSqlOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/spark_sql_operator.html#SparkSqlOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.spark_sql_operator.SparkSqlOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Call the SparkSqlHook to run the provided sql query</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.spark_submit_operator.</code><code class="descname">SparkSubmitOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/spark_submit_operator.html#SparkSubmitOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>This hook is a wrapper around the spark-submit binary to kick off a spark-submit job. |
| It requires that the “spark-submit” binary is in the PATH or the spark-home is set |
| in the extra on the connection.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>application</strong> (<em>str</em>) – The application that submitted as a job, either jar or |
| py file. (templated)</li> |
| <li><strong>conf</strong> (<em>dict</em>) – Arbitrary Spark configuration properties</li> |
| <li><strong>conn_id</strong> (<em>str</em>) – The connection id as configured in Airflow administration. When an |
| invalid connection_id is supplied, it will default to yarn.</li> |
| <li><strong>files</strong> (<em>str</em>) – Upload additional files to the executor running the job, separated by a |
| comma. Files will be placed in the working directory of each executor. |
| For example, serialized objects.</li> |
| <li><strong>py_files</strong> (<em>str</em>) – Additional python files used by the job, can be .zip, .egg or .py.</li> |
| <li><strong>jars</strong> (<em>str</em>) – Submit additional jars to upload and place them in executor classpath.</li> |
| <li><strong>driver_classpath</strong> (<em>str</em>) – Additional, driver-specific, classpath settings.</li> |
| <li><strong>java_class</strong> (<em>str</em>) – the main class of the Java application</li> |
| <li><strong>packages</strong> (<em>str</em>) – Comma-separated list of maven coordinates of jars to include on the |
| driver and executor classpaths. (templated)</li> |
| <li><strong>exclude_packages</strong> (<em>str</em>) – Comma-separated list of maven coordinates of jars to exclude |
| while resolving the dependencies provided in ‘packages’</li> |
| <li><strong>repositories</strong> (<em>str</em>) – Comma-separated list of additional remote repositories to search |
| for the maven coordinates given with ‘packages’</li> |
| <li><strong>total_executor_cores</strong> (<em>int</em>) – (Standalone & Mesos only) Total cores for all executors |
| (Default: all the available cores on the worker)</li> |
| <li><strong>executor_cores</strong> (<em>int</em>) – (Standalone & YARN only) Number of cores per executor |
| (Default: 2)</li> |
| <li><strong>executor_memory</strong> (<em>str</em>) – Memory per executor (e.g. 1000M, 2G) (Default: 1G)</li> |
| <li><strong>driver_memory</strong> (<em>str</em>) – Memory allocated to the driver (e.g. 1000M, 2G) (Default: 1G)</li> |
| <li><strong>keytab</strong> (<em>str</em>) – Full path to the file that contains the keytab</li> |
| <li><strong>principal</strong> (<em>str</em>) – The name of the kerberos principal used for keytab</li> |
| <li><strong>name</strong> (<em>str</em>) – Name of the job (default airflow-spark). (templated)</li> |
| <li><strong>num_executors</strong> (<em>int</em>) – Number of executors to launch</li> |
| <li><strong>application_args</strong> (<em>list</em>) – Arguments for the application being submitted</li> |
| <li><strong>env_vars</strong> (<em>dict</em>) – Environment variables for spark-submit. It |
| supports yarn and k8s mode too.</li> |
| <li><strong>verbose</strong> (<em>bool</em>) – Whether to pass the verbose flag to spark-submit process for debugging</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/spark_submit_operator.html#SparkSubmitOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.spark_submit_operator.SparkSubmitOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Call the SparkSubmitHook to run the provided spark job</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sqoop_operator.SqoopOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sqoop_operator.</code><code class="descname">SqoopOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sqoop_operator.html#SqoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sqoop_operator.SqoopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a Sqoop job. |
| Documentation for Apache Sqoop can be found here:</p> |
| <blockquote> |
| <div><a class="reference external" href="https://sqoop.apache.org/docs/1.4.2/SqoopUserGuide.html">https://sqoop.apache.org/docs/1.4.2/SqoopUserGuide.html</a>.</div></blockquote> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.sqoop_operator.SqoopOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sqoop_operator.html#SqoopOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sqoop_operator.SqoopOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute sqoop job</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.ssh_operator.SSHOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.ssh_operator.</code><code class="descname">SSHOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ssh_operator.html#SSHOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ssh_operator.SSHOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>SSHOperator to execute commands on given remote host using the ssh_hook.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>ssh_hook</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">SSHHook</span></code>) – predefined ssh_hook to use for remote execution. |
| Either <cite>ssh_hook</cite> or <cite>ssh_conn_id</cite> needs to be provided.</li> |
| <li><strong>ssh_conn_id</strong> (<em>str</em>) – connection id from airflow Connections. |
| <cite>ssh_conn_id</cite> will be ingored if <cite>ssh_hook</cite> is provided.</li> |
| <li><strong>remote_host</strong> (<em>str</em>) – remote host to connect (templated) |
| Nullable. If provided, it will replace the <cite>remote_host</cite> which was |
| defined in <cite>ssh_hook</cite> or predefined in the connection of <cite>ssh_conn_id</cite>.</li> |
| <li><strong>command</strong> (<em>str</em>) – command to execute on remote host. (templated)</li> |
| <li><strong>timeout</strong> (<em>int</em>) – timeout (in seconds) for executing the command.</li> |
| <li><strong>do_xcom_push</strong> (<em>bool</em>) – return the stdout which also get set in xcom by airflow platform</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.vertica_operator.VerticaOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.vertica_operator.</code><code class="descname">VerticaOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/vertica_operator.html#VerticaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.vertica_operator.VerticaOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes sql code in a specific Vertica database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>vertica_conn_id</strong> (<em>string</em>) – reference to a specific Vertica database</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'</em>) – the sql code to be executed. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.vertica_to_hive.VerticaToHiveTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.vertica_to_hive.</code><code class="descname">VerticaToHiveTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/vertica_to_hive.html#VerticaToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.vertica_to_hive.VerticaToHiveTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from Vertia to Hive. The operator runs |
| your query against Vertia, stores the file locally |
| before loading it into a Hive table. If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or |
| <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, |
| a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated. |
| Hive data types are inferred from the cursor’s metadata. |
| Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the table gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em>) – SQL query to execute against the Vertia database. (templated)</li> |
| <li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every execution</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns |
| and values. (templated)</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>vertica_conn_id</strong> (<em>str</em>) – source Vertica connection</li> |
| <li><strong>hive_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.winrm_operator.WinRMOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.winrm_operator.</code><code class="descname">WinRMOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/winrm_operator.html#WinRMOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.winrm_operator.WinRMOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>WinRMOperator to execute commands on given remote host using the winrm_hook.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>winrm_hook</strong> (<code class="xref py py-class docutils literal notranslate"><span class="pre">WinRMHook</span></code>) – predefined ssh_hook to use for remote execution</li> |
| <li><strong>ssh_conn_id</strong> (<em>str</em>) – connection id from airflow Connections</li> |
| <li><strong>remote_host</strong> (<em>str</em>) – remote host to connect</li> |
| <li><strong>command</strong> (<em>str</em>) – command to execute on remote host. (templated)</li> |
| <li><strong>timeout</strong> (<em>int</em>) – timeout for executing the command.</li> |
| <li><strong>do_xcom_push</strong> (<em>bool</em>) – return the stdout which also get set in xcom by airflow platform</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id9"> |
| <h4>Sensors<a class="headerlink" href="#id9" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.aws_athena_sensor.AthenaSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_athena_sensor.</code><code class="descname">AthenaSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_athena_sensor.html#AthenaSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_athena_sensor.AthenaSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Asks for the state of the Query until it reaches a failure state or success state. |
| If it fails, failing the task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query_execution_id</strong> (<em>str</em>) – query_execution_id to check the state of</li> |
| <li><strong>max_retires</strong> (<em>int</em>) – Number of times to poll for query state before |
| returning the current state, defaults to None</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to use, defaults to ‘aws_default’</li> |
| <li><strong>sleep_time</strong> (<em>int</em>) – Time to wait between two consecutive call to |
| check query status on athena, defaults to 10</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.aws_athena_sensor.AthenaSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_athena_sensor.html#AthenaSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_athena_sensor.AthenaSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.aws_glue_catalog_partition_sensor.AwsGlueCatalogPartitionSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_glue_catalog_partition_sensor.</code><code class="descname">AwsGlueCatalogPartitionSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_glue_catalog_partition_sensor.html#AwsGlueCatalogPartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_glue_catalog_partition_sensor.AwsGlueCatalogPartitionSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a partition to show up in AWS Glue Catalog.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table_name</strong> (<em>str</em>) – The name of the table to wait for, supports the dot |
| notation (my_database.my_table)</li> |
| <li><strong>expression</strong> (<em>str</em>) – The partition clause to wait for. This is passed as |
| is to the AWS Glue Catalog API’s get_partitions function, |
| and supports SQL like notation as in <code class="docutils literal notranslate"><span class="pre">ds='2015-01-01'</span> |
| <span class="pre">AND</span> <span class="pre">type='value'</span></code> and comparison operators as in <code class="docutils literal notranslate"><span class="pre">"ds>=2015-01-01"</span></code>. |
| See <a class="reference external" href="https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html">https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html</a> |
| #aws-glue-api-catalog-partitions-GetPartitions</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – ID of the Airflow connection where |
| credentials and extra configuration are stored</li> |
| <li><strong>region_name</strong> (<em>str</em>) – Optional aws region name (example: us-east-1). Uses region from connection |
| if not specified.</li> |
| <li><strong>database_name</strong> (<em>str</em>) – The name of the catalog database where the partitions reside.</li> |
| <li><strong>poke_interval</strong> (<em>int</em>) – Time in seconds that the job should wait in |
| between each tries</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.aws_glue_catalog_partition_sensor.AwsGlueCatalogPartitionSensor.get_hook"> |
| <code class="descname">get_hook</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_glue_catalog_partition_sensor.html#AwsGlueCatalogPartitionSensor.get_hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_glue_catalog_partition_sensor.AwsGlueCatalogPartitionSensor.get_hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the AwsGlueCatalogHook</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.aws_glue_catalog_partition_sensor.AwsGlueCatalogPartitionSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_glue_catalog_partition_sensor.html#AwsGlueCatalogPartitionSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_glue_catalog_partition_sensor.AwsGlueCatalogPartitionSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for existence of the partition in the AWS Glue Catalog table</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_redshift_cluster_sensor.</code><code class="descname">AwsRedshiftClusterSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a Redshift cluster to reach a specific status.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – The identifier for the cluster being pinged.</li> |
| <li><strong>target_status</strong> (<em>str</em>) – The cluster status desired.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.azure_cosmos_sensor.</code><code class="descname">AzureCosmosDocumentSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/azure_cosmos_sensor.html#AzureCosmosDocumentSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a document which |
| matches the given query in CosmosDB. Example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">azure_cosmos_sensor</span> <span class="o">=</span> <span class="n">AzureCosmosDocumentSensor</span><span class="p">(</span><span class="n">database_name</span><span class="o">=</span><span class="s2">"somedatabase_name"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">collection_name</span><span class="o">=</span><span class="s2">"somecollection_name"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">document_id</span><span class="o">=</span><span class="s2">"unique-doc-id"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">azure_cosmos_conn_id</span><span class="o">=</span><span class="s2">"azure_cosmos_default"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">task_id</span><span class="o">=</span><span class="s2">"azure_cosmos_sensor"</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/azure_cosmos_sensor.html#AzureCosmosDocumentSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.bash_sensor.BashSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.bash_sensor.</code><code class="descname">BashSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/bash_sensor.html#BashSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.bash_sensor.BashSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Executes a bash command/script and returns True if and only if the |
| return code is 0.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bash_command</strong> (<em>string</em>) – The command, set of commands or reference to a |
| bash script (must be ‘.sh’) to be executed.</li> |
| <li><strong>env</strong> (<em>dict</em>) – If env is not None, it must be a mapping that defines the |
| environment variables for the new process; these are used instead |
| of inheriting the current process environment, which is the default |
| behavior. (templated)</li> |
| <li><strong>output_encoding</strong> (<em>string</em>) – output encoding of bash command.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.bash_sensor.BashSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/bash_sensor.html#BashSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.bash_sensor.BashSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the bash command in a temporary directory |
| which will be cleaned afterwards</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.bigquery_sensor.BigQueryTableSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.bigquery_sensor.</code><code class="descname">BigQueryTableSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/bigquery_sensor.html#BigQueryTableSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.bigquery_sensor.BigQueryTableSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a table in Google Bigquery.</p> |
| <blockquote> |
| <div><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param project_id:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The Google cloud project in which to look for the table. |
| The connection supplied to the hook must provide |
| access to the specified project.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type project_id:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param dataset_id:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The name of the dataset in which to look for the table. |
| storage bucket.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type dataset_id:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">param table_id:</th><td class="field-body">The name of the table to check the existence of.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type table_id:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param bigquery_conn_id:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The connection ID to use when connecting to |
| Google BigQuery.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type bigquery_conn_id:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param delegate_to:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have domain-wide delegation enabled.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type delegate_to:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.bigquery_sensor.BigQueryTableSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/bigquery_sensor.html#BigQueryTableSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.bigquery_sensor.BigQueryTableSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.cassandra_record_sensor.CassandraRecordSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.cassandra_record_sensor.</code><code class="descname">CassandraRecordSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/cassandra_record_sensor.html#CassandraRecordSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.cassandra_record_sensor.CassandraRecordSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a record in a Cassandra cluster.</p> |
| <p>For example, if you want to wait for a record that has values ‘v1’ and ‘v2’ for each |
| primary keys ‘p1’ and ‘p2’ to be populated in keyspace ‘k’ and table ‘t’, |
| instantiate it as follows:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">cassandra_sensor</span> <span class="o">=</span> <span class="n">CassandraRecordSensor</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s2">"k.t"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">keys</span><span class="o">=</span><span class="p">{</span><span class="s2">"p1"</span><span class="p">:</span> <span class="s2">"v1"</span><span class="p">,</span> <span class="s2">"p2"</span><span class="p">:</span> <span class="s2">"v2"</span><span class="p">},</span> |
| <span class="gp">... </span> <span class="n">cassandra_conn_id</span><span class="o">=</span><span class="s2">"cassandra_default"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">task_id</span><span class="o">=</span><span class="s2">"cassandra_sensor"</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.cassandra_record_sensor.CassandraRecordSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/cassandra_record_sensor.html#CassandraRecordSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.cassandra_record_sensor.CassandraRecordSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.cassandra_table_sensor.CassandraTableSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.cassandra_table_sensor.</code><code class="descname">CassandraTableSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/cassandra_table_sensor.html#CassandraTableSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.cassandra_table_sensor.CassandraTableSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a table in a Cassandra cluster.</p> |
| <p>For example, if you want to wait for a table called ‘t’ to be created |
| in a keyspace ‘k’, instantiate it as follows:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">cassandra_sensor</span> <span class="o">=</span> <span class="n">CassandraTableSensor</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s2">"k.t"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">cassandra_conn_id</span><span class="o">=</span><span class="s2">"cassandra_default"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">task_id</span><span class="o">=</span><span class="s2">"cassandra_sensor"</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.cassandra_table_sensor.CassandraTableSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/cassandra_table_sensor.html#CassandraTableSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.cassandra_table_sensor.CassandraTableSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.datadog_sensor.DatadogSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.datadog_sensor.</code><code class="descname">DatadogSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/datadog_sensor.html#DatadogSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.datadog_sensor.DatadogSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>A sensor to listen, with a filter, to datadog event streams and determine |
| if some event was emitted.</p> |
| <p>Depends on the datadog API, which has to be deployed on the same server where |
| Airflow runs.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>datadog_conn_id</strong> – The connection to datadog, containing metadata for api keys.</li> |
| <li><strong>datadog_conn_id</strong> – string</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.datadog_sensor.DatadogSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/datadog_sensor.html#DatadogSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.datadog_sensor.DatadogSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.emr_base_sensor.</code><code class="descname">EmrBaseSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/emr_base_sensor.html#EmrBaseSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Contains general sensor behavior for EMR. |
| Subclasses should implement get_emr_response() and state_from_response() methods. |
| Subclasses should also implement NON_TERMINAL_STATES and FAILED_STATE constants.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/emr_base_sensor.html#EmrBaseSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.emr_job_flow_sensor.EmrJobFlowSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.emr_job_flow_sensor.</code><code class="descname">EmrJobFlowSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/emr_job_flow_sensor.html#EmrJobFlowSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.emr_job_flow_sensor.EmrJobFlowSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor" title="airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor</span></code></a></p> |
| <p>Asks for the state of the JobFlow until it reaches a terminal state. |
| If it fails the sensor errors, failing the task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>job_flow_id</strong> (<em>string</em>) – job_flow_id to check the state of</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.emr_step_sensor.EmrStepSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.emr_step_sensor.</code><code class="descname">EmrStepSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/emr_step_sensor.html#EmrStepSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.emr_step_sensor.EmrStepSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor" title="airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.emr_base_sensor.EmrBaseSensor</span></code></a></p> |
| <p>Asks for the state of the step until it reaches a terminal state. |
| If it fails the sensor errors, failing the task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> (<em>string</em>) – job_flow_id which contains the step check the state of</li> |
| <li><strong>step_id</strong> (<em>string</em>) – step to check the state of</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.file_sensor.FileSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.file_sensor.</code><code class="descname">FileSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/file_sensor.html#FileSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.file_sensor.FileSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a file or folder to land in a filesystem.</p> |
| <p>If the path given is a directory then this sensor will only return true if |
| any files exist inside it (either directly, or within a subdirectory)</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>fs_conn_id</strong> (<em>string</em>) – reference to the File (path) |
| connection id</li> |
| <li><strong>filepath</strong> – File or folder name (relative to |
| the base path set within the connection)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.file_sensor.FileSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/file_sensor.html#FileSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.file_sensor.FileSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.ftp_sensor.FTPSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.ftp_sensor.</code><code class="descname">FTPSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/ftp_sensor.html#FTPSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.ftp_sensor.FTPSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a file or directory to be present on FTP.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.ftp_sensor.FTPSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/ftp_sensor.html#FTPSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.ftp_sensor.FTPSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.sensors.ftp_sensor.FTPSensor.template_fields"> |
| <code class="descname">template_fields</code><em class="property"> = ('path',)</em><a class="headerlink" href="#airflow.contrib.sensors.ftp_sensor.FTPSensor.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Errors that are transient in nature, and where action can be retried</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.ftp_sensor.FTPSSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.ftp_sensor.</code><code class="descname">FTPSSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/ftp_sensor.html#FTPSSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.ftp_sensor.FTPSSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.ftp_sensor.FTPSensor" title="airflow.contrib.sensors.ftp_sensor.FTPSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.ftp_sensor.FTPSensor</span></code></a></p> |
| <p>Waits for a file or directory to be present on FTP over SSL.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.gcs_sensor.</code><code class="descname">GoogleCloudStorageObjectSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/gcs_sensor.html#GoogleCloudStorageObjectSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a file in Google Cloud Storage. |
| Create a new GoogleCloudStorageObjectSensor.</p> |
| <blockquote> |
| <div><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">param bucket:</th><td class="field-body">The Google cloud storage bucket where the object is.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type bucket:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">param object:</th><td class="field-body">The name of the object to check in the Google cloud |
| storage bucket.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type object:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param google_cloud_storage_conn_id:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The connection ID to use when |
| connecting to Google cloud storage.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type google_cloud_storage_conn_id:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param delegate_to:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type delegate_to:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/gcs_sensor.html#GoogleCloudStorageObjectSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectUpdatedSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.gcs_sensor.</code><code class="descname">GoogleCloudStorageObjectUpdatedSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/gcs_sensor.html#GoogleCloudStorageObjectUpdatedSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectUpdatedSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks if an object is updated in Google Cloud Storage. |
| Create a new GoogleCloudStorageObjectUpdatedSensor.</p> |
| <blockquote> |
| <div><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">param bucket:</th><td class="field-body">The Google cloud storage bucket where the object is.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type bucket:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">param object:</th><td class="field-body">The name of the object to download in the Google cloud |
| storage bucket.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type object:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">param ts_func:</th><td class="field-body">Callback for defining the update condition. The default callback |
| returns execution_date + schedule_interval. The callback takes the context |
| as parameter.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type ts_func:</th><td class="field-body">function</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param google_cloud_storage_conn_id:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The connection ID to use when |
| connecting to Google cloud storage.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type google_cloud_storage_conn_id:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param delegate_to:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type delegate_to:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectUpdatedSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/gcs_sensor.html#GoogleCloudStorageObjectUpdatedSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.gcs_sensor.GoogleCloudStorageObjectUpdatedSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.gcs_sensor.GoogleCloudStoragePrefixSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.gcs_sensor.</code><code class="descname">GoogleCloudStoragePrefixSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/gcs_sensor.html#GoogleCloudStoragePrefixSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.gcs_sensor.GoogleCloudStoragePrefixSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a files at prefix in Google Cloud Storage bucket. |
| Create a new GoogleCloudStorageObjectSensor.</p> |
| <blockquote> |
| <div><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">param bucket:</th><td class="field-body">The Google cloud storage bucket where the object is.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type bucket:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">param prefix:</th><td class="field-body">The name of the prefix to check in the Google cloud |
| storage bucket.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">type prefix:</th><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param google_cloud_storage_conn_id:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The connection ID to use when |
| connecting to Google cloud storage.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type google_cloud_storage_conn_id:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">param delegate_to:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">type delegate_to:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">string</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.gcs_sensor.GoogleCloudStoragePrefixSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/gcs_sensor.html#GoogleCloudStoragePrefixSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.gcs_sensor.GoogleCloudStoragePrefixSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.hdfs_sensor.HdfsSensorFolder"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.hdfs_sensor.</code><code class="descname">HdfsSensorFolder</code><span class="sig-paren">(</span><em>be_empty=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/hdfs_sensor.html#HdfsSensorFolder"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.hdfs_sensor.HdfsSensorFolder" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.hdfs_sensor.HdfsSensor" title="airflow.sensors.hdfs_sensor.HdfsSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.hdfs_sensor.HdfsSensor</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.hdfs_sensor.HdfsSensorFolder.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/hdfs_sensor.html#HdfsSensorFolder.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.hdfs_sensor.HdfsSensorFolder.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>poke for a non empty directory</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Bool depending on the search criteria</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.hdfs_sensor.HdfsSensorRegex"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.hdfs_sensor.</code><code class="descname">HdfsSensorRegex</code><span class="sig-paren">(</span><em>regex</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/hdfs_sensor.html#HdfsSensorRegex"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.hdfs_sensor.HdfsSensorRegex" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.hdfs_sensor.HdfsSensor" title="airflow.sensors.hdfs_sensor.HdfsSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.hdfs_sensor.HdfsSensor</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.hdfs_sensor.HdfsSensorRegex.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/hdfs_sensor.html#HdfsSensorRegex.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.hdfs_sensor.HdfsSensorRegex.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>poke matching files in a directory with self.regex</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Bool depending on the search criteria</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.jira_sensor.JiraSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.jira_sensor.</code><code class="descname">JiraSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/jira_sensor.html#JiraSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.jira_sensor.JiraSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Monitors a jira ticket for any change.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>jira_conn_id</strong> (<em>str</em>) – reference to a pre-defined Jira Connection</li> |
| <li><strong>method_name</strong> (<em>str</em>) – method name from jira-python-sdk to be execute</li> |
| <li><strong>method_params</strong> (<em>dict</em>) – parameters for the method method_name</li> |
| <li><strong>result_processor</strong> (<em>function</em>) – function that return boolean and act as a sensor response</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.jira_sensor.JiraSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/jira_sensor.html#JiraSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.jira_sensor.JiraSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.pubsub_sensor.PubSubPullSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.pubsub_sensor.</code><code class="descname">PubSubPullSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/pubsub_sensor.html#PubSubPullSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.pubsub_sensor.PubSubPullSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Pulls messages from a PubSub subscription and passes them through XCom.</p> |
| <p>This sensor operator will pull up to <code class="docutils literal notranslate"><span class="pre">max_messages</span></code> messages from the |
| specified PubSub subscription. When the subscription returns messages, |
| the poke method’s criteria will be fulfilled and the messages will be |
| returned from the operator and passed through XCom for downstream tasks.</p> |
| <p>If <code class="docutils literal notranslate"><span class="pre">ack_messages</span></code> is set to True, messages will be immediately |
| acknowledged before being returned, otherwise, downstream tasks will be |
| responsible for acknowledging them.</p> |
| <p><code class="docutils literal notranslate"><span class="pre">project</span></code> and <code class="docutils literal notranslate"><span class="pre">subscription</span></code> are templated so you can use |
| variables in them.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.pubsub_sensor.PubSubPullSensor.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/pubsub_sensor.html#PubSubPullSensor.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.pubsub_sensor.PubSubPullSensor.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Overridden to allow messages to be passed</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.pubsub_sensor.PubSubPullSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/pubsub_sensor.html#PubSubPullSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.pubsub_sensor.PubSubPullSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.python_sensor.PythonSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.python_sensor.</code><code class="descname">PythonSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/python_sensor.html#PythonSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.python_sensor.PythonSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a Python callable to return True.</p> |
| <dl class="docutils"> |
| <dt>User could put input argument in templates_dict</dt> |
| <dd>e.g templates_dict = {‘start_ds’: 1970}</dd> |
| </dl> |
| <p>and access the argument by calling <cite>kwargs[‘templates_dict’][‘start_ds’]</cite> |
| in the the callable</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>python_callable</strong> (<em>python callable</em>) – A reference to an object that is callable</li> |
| <li><strong>op_kwargs</strong> (<em>dict</em>) – a dictionary of keyword arguments that will get unpacked |
| in your function</li> |
| <li><strong>op_args</strong> (<em>list</em>) – a list of positional arguments that will get unpacked when |
| calling your callable</li> |
| <li><strong>provide_context</strong> (<em>bool</em>) – if set to true, Airflow will pass a set of |
| keyword arguments that can be used in your function. This set of |
| kwargs correspond exactly to what you can use in your jinja |
| templates. For this to work, you need to define <cite>**kwargs</cite> in your |
| function header.</li> |
| <li><strong>templates_dict</strong> (<em>dict of str</em>) – a dictionary where the values are templates that |
| will get templated by the Airflow engine sometime between |
| <code class="docutils literal notranslate"><span class="pre">__init__</span></code> and <code class="docutils literal notranslate"><span class="pre">execute</span></code> takes place and are made available |
| in your callable’s context after the template has been applied.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.python_sensor.PythonSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/python_sensor.html#PythonSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.python_sensor.PythonSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.qubole_sensor.QuboleSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.qubole_sensor.</code><code class="descname">QuboleSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/qubole_sensor.html#QuboleSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.qubole_sensor.QuboleSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Base class for all Qubole Sensors</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>qubole_conn_id</strong> (<em>string</em>) – The qubole connection to run the sensor against</li> |
| <li><strong>data</strong> (<em>a JSON object</em>) – a JSON object containing payload, whose presence needs to be checked</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">Both <code class="docutils literal notranslate"><span class="pre">data</span></code> and <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code> fields are template-supported. You can</p> |
| </div> |
| <p>also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template driven use cases.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.qubole_sensor.QuboleSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/qubole_sensor.html#QuboleSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.qubole_sensor.QuboleSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.redis_key_sensor.RedisKeySensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.redis_key_sensor.</code><code class="descname">RedisKeySensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/redis_key_sensor.html#RedisKeySensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.redis_key_sensor.RedisKeySensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a key in a Redis database</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.redis_key_sensor.RedisKeySensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/redis_key_sensor.html#RedisKeySensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.redis_key_sensor.RedisKeySensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.sagemaker_base_sensor.</code><code class="descname">SageMakerBaseSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sagemaker_base_sensor.html#SageMakerBaseSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Contains general sensor behavior for SageMaker. |
| Subclasses should implement get_sagemaker_response() |
| and state_from_response() methods. |
| Subclasses should also implement NON_TERMINAL_STATES and FAILED_STATE methods.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sagemaker_base_sensor.html#SageMakerBaseSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.sagemaker_endpoint_sensor.SageMakerEndpointSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.sagemaker_endpoint_sensor.</code><code class="descname">SageMakerEndpointSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sagemaker_endpoint_sensor.html#SageMakerEndpointSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sagemaker_endpoint_sensor.SageMakerEndpointSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor" title="airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor</span></code></a></p> |
| <p>Asks for the state of the endpoint state until it reaches a terminal state. |
| If it fails the sensor errors, the task fails.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>job_name</strong> (<em>str</em>) – job_name of the endpoint instance to check the state of</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.sagemaker_training_sensor.SageMakerTrainingSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.sagemaker_training_sensor.</code><code class="descname">SageMakerTrainingSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sagemaker_training_sensor.html#SageMakerTrainingSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sagemaker_training_sensor.SageMakerTrainingSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor" title="airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor</span></code></a></p> |
| <p>Asks for the state of the training state until it reaches a terminal state. |
| If it fails the sensor errors, failing the task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the SageMaker training job to check the state of</li> |
| <li><strong>print_log</strong> (<em>bool</em>) – if the operator should print the cloudwatch log</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.sagemaker_transform_sensor.SageMakerTransformSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.sagemaker_transform_sensor.</code><code class="descname">SageMakerTransformSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sagemaker_transform_sensor.html#SageMakerTransformSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sagemaker_transform_sensor.SageMakerTransformSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor" title="airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor</span></code></a></p> |
| <p>Asks for the state of the transform state until it reaches a terminal state. |
| The sensor will error if the job errors, throwing a AirflowException |
| containing the failure reason.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>job_name</strong> (<em>string</em>) – job_name of the transform job instance to check the state of</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.sagemaker_tuning_sensor.SageMakerTuningSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.sagemaker_tuning_sensor.</code><code class="descname">SageMakerTuningSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sagemaker_tuning_sensor.html#SageMakerTuningSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sagemaker_tuning_sensor.SageMakerTuningSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor" title="airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.sagemaker_base_sensor.SageMakerBaseSensor</span></code></a></p> |
| <p>Asks for the state of the tuning state until it reaches a terminal state. |
| The sensor will error if the job errors, throwing a AirflowException |
| containing the failure reason.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>job_name</strong> (<em>str</em>) – job_name of the tuning instance to check the state of</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.sftp_sensor.SFTPSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.sftp_sensor.</code><code class="descname">SFTPSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sftp_sensor.html#SFTPSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sftp_sensor.SFTPSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.sensors.BaseSensorOperator</span></code></p> |
| <p>Waits for a file or directory to be present on SFTP. |
| :param path: Remote file or directory path |
| :type path: str |
| :param sftp_conn_id: The connection to run the sensor against |
| :type sftp_conn_id: str</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.sftp_sensor.SFTPSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/sftp_sensor.html#SFTPSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.sftp_sensor.SFTPSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbBlobSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a blob to arrive on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.check_for_blob()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.weekday_sensor.DayOfWeekSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.weekday_sensor.</code><code class="descname">DayOfWeekSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/weekday_sensor.html#DayOfWeekSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.weekday_sensor.DayOfWeekSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits until the first specified day of the week. For example, if the execution |
| day of the task is ‘2018-12-22’ (Saturday) and you pass ‘FRIDAY’, the task will wait |
| until next Friday.</p> |
| <p><strong>Example</strong> (with single day):</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">weekend_check</span> <span class="o">=</span> <span class="n">DayOfWeekSensor</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'weekend_check'</span><span class="p">,</span> |
| <span class="n">week_day</span><span class="o">=</span><span class="s1">'Saturday'</span><span class="p">,</span> |
| <span class="n">use_task_execution_day</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><strong>Example</strong> (with multiple day using set):</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">weekend_check</span> <span class="o">=</span> <span class="n">DayOfWeekSensor</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'weekend_check'</span><span class="p">,</span> |
| <span class="n">week_day</span><span class="o">=</span><span class="p">{</span><span class="s1">'Saturday'</span><span class="p">,</span> <span class="s1">'Sunday'</span><span class="p">},</span> |
| <span class="n">use_task_execution_day</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><strong>Example</strong> (with <code class="xref py py-class docutils literal notranslate"><span class="pre">WeekDay</span></code> enum):</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># import WeekDay Enum</span> |
| <span class="kn">from</span> <span class="nn">airflow.contrib.utils.weekday</span> <span class="k">import</span> <span class="n">WeekDay</span> |
| |
| <span class="n">weekend_check</span> <span class="o">=</span> <span class="n">DayOfWeekSensor</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'weekend_check'</span><span class="p">,</span> |
| <span class="n">week_day</span><span class="o">=</span><span class="p">{</span><span class="n">WeekDay</span><span class="o">.</span><span class="n">SATURDAY</span><span class="p">,</span> <span class="n">WeekDay</span><span class="o">.</span><span class="n">SUNDAY</span><span class="p">},</span> |
| <span class="n">use_task_execution_day</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>week_day</strong> (<em>set</em><em> or </em><em>str</em><em> or </em><em>WeekDay</em>) – <p>Day of the week to check (full name). Optionally, a set |
| of days can also be provided using a set. |
| Example values:</p> |
| <blockquote> |
| <div><ul> |
| <li><code class="docutils literal notranslate"><span class="pre">"MONDAY"</span></code>,</li> |
| <li><code class="docutils literal notranslate"><span class="pre">{"Saturday",</span> <span class="pre">"Sunday"}</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">{WeekDay.TUESDAY}</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">{WeekDay.SATURDAY,</span> <span class="pre">WeekDay.SUNDAY}</span></code></li> |
| </ul> |
| </div></blockquote> |
| </li> |
| <li><strong>use_task_execution_day</strong> (<em>bool</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, uses task’s execution day to compare |
| with week_day. Execution Date is Useful for backfilling. |
| If <code class="docutils literal notranslate"><span class="pre">False</span></code>, uses system’s day of the week. Useful when you |
| don’t want to run anything on weekdays on the system.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.weekday_sensor.DayOfWeekSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/weekday_sensor.html#DayOfWeekSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.weekday_sensor.DayOfWeekSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="macros"> |
| <span id="id10"></span><h2>Macros<a class="headerlink" href="#macros" title="Permalink to this headline">¶</a></h2> |
| <p>Here’s a list of variables and macros that can be used in templates</p> |
| <div class="section" id="default-variables"> |
| <h3>Default Variables<a class="headerlink" href="#default-variables" title="Permalink to this headline">¶</a></h3> |
| <p>The Airflow engine passes a few variables by default that are accessible |
| in all templates</p> |
| <table border="1" class="docutils"> |
| <colgroup> |
| <col width="28%" /> |
| <col width="72%" /> |
| </colgroup> |
| <thead valign="bottom"> |
| <tr class="row-odd"><th class="head">Variable</th> |
| <th class="head">Description</th> |
| </tr> |
| </thead> |
| <tbody valign="top"> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code></td> |
| <td>the execution date as <code class="docutils literal notranslate"><span class="pre">YYYY-MM-DD</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds_nodash</span> <span class="pre">}}</span></code></td> |
| <td>the execution date as <code class="docutils literal notranslate"><span class="pre">YYYYMMDD</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">prev_ds</span> <span class="pre">}}</span></code></td> |
| <td>the previous execution date as <code class="docutils literal notranslate"><span class="pre">YYYY-MM-DD</span></code> |
| if <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code> is <code class="docutils literal notranslate"><span class="pre">2018-01-08</span></code> and <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code> is <code class="docutils literal notranslate"><span class="pre">@weekly</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">prev_ds</span> <span class="pre">}}</span></code> will be <code class="docutils literal notranslate"><span class="pre">2016-01-01</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">prev_ds_nodash</span> <span class="pre">}}</span></code></td> |
| <td>the previous execution date as <code class="docutils literal notranslate"><span class="pre">YYYYMMDD</span></code> if exists, else <a href="#id11"><span class="problematic" id="id12">``</span></a>None`</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">next_ds</span> <span class="pre">}}</span></code></td> |
| <td>the next execution date as <code class="docutils literal notranslate"><span class="pre">YYYY-MM-DD</span></code> |
| if <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code> is <code class="docutils literal notranslate"><span class="pre">2018-01-01</span></code> and <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code> is <code class="docutils literal notranslate"><span class="pre">@weekly</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">next_ds</span> <span class="pre">}}</span></code> will be <code class="docutils literal notranslate"><span class="pre">2018-01-08</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">next_ds_nodash</span> <span class="pre">}}</span></code></td> |
| <td>the next execution date as <code class="docutils literal notranslate"><span class="pre">YYYYMMDD</span></code> if exists, else <a href="#id13"><span class="problematic" id="id14">``</span></a>None`</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">yesterday_ds</span> <span class="pre">}}</span></code></td> |
| <td>the day before the execution date as <code class="docutils literal notranslate"><span class="pre">YYYY-MM-DD</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">yesterday_ds_nodash</span> <span class="pre">}}</span></code></td> |
| <td>the day before the execution date as <code class="docutils literal notranslate"><span class="pre">YYYYMMDD</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">tomorrow_ds</span> <span class="pre">}}</span></code></td> |
| <td>the day after the execution date as <code class="docutils literal notranslate"><span class="pre">YYYY-MM-DD</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">tomorrow_ds_nodash</span> <span class="pre">}}</span></code></td> |
| <td>the day after the execution date as <code class="docutils literal notranslate"><span class="pre">YYYYMMDD</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ts</span> <span class="pre">}}</span></code></td> |
| <td>same as <code class="docutils literal notranslate"><span class="pre">execution_date.isoformat()</span></code>. Example: <code class="docutils literal notranslate"><span class="pre">2018-01-01T00:00:00+00:00</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ts_nodash</span> <span class="pre">}}</span></code></td> |
| <td>same as <code class="docutils literal notranslate"><span class="pre">ts</span></code> without <code class="docutils literal notranslate"><span class="pre">-</span></code>, <code class="docutils literal notranslate"><span class="pre">:</span></code> and TimeZone info. Example: <code class="docutils literal notranslate"><span class="pre">20180101T000000</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ts_nodash_with_tz</span> <span class="pre">}}</span></code></td> |
| <td>same as <code class="docutils literal notranslate"><span class="pre">ts</span></code> without <code class="docutils literal notranslate"><span class="pre">-</span></code> and <code class="docutils literal notranslate"><span class="pre">:</span></code>. Example: <code class="docutils literal notranslate"><span class="pre">20180101T000000+0000</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">execution_date</span> <span class="pre">}}</span></code></td> |
| <td>the execution_date, (datetime.datetime)</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">prev_execution_date</span> <span class="pre">}}</span></code></td> |
| <td>the previous execution date (if available) (datetime.datetime)</td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">next_execution_date</span> <span class="pre">}}</span></code></td> |
| <td>the next execution date (datetime.datetime)</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">dag</span> <span class="pre">}}</span></code></td> |
| <td>the DAG object</td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">task</span> <span class="pre">}}</span></code></td> |
| <td>the Task object</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">macros</span> <span class="pre">}}</span></code></td> |
| <td>a reference to the macros package, described below</td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">task_instance</span> <span class="pre">}}</span></code></td> |
| <td>the task_instance object</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">end_date</span> <span class="pre">}}</span></code></td> |
| <td>same as <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">latest_date</span> <span class="pre">}}</span></code></td> |
| <td>same as <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ti</span> <span class="pre">}}</span></code></td> |
| <td>same as <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">task_instance</span> <span class="pre">}}</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">params</span> <span class="pre">}}</span></code></td> |
| <td>a reference to the user-defined params dictionary which can be overridden by |
| the dictionary passed through <code class="docutils literal notranslate"><span class="pre">trigger_dag</span> <span class="pre">-c</span></code> if you enabled |
| <code class="docutils literal notranslate"><span class="pre">dag_run_conf_overrides_params`</span> <span class="pre">in</span> <span class="pre">``airflow.cfg</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">var.value.my_var</span> <span class="pre">}}</span></code></td> |
| <td>global defined variables represented as a dictionary</td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">var.json.my_var.path</span> <span class="pre">}}</span></code></td> |
| <td>global defined variables represented as a dictionary |
| with deserialized JSON object, append the path to the |
| key within the JSON object</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">task_instance_key_str</span> <span class="pre">}}</span></code></td> |
| <td>a unique, human-readable key to the task instance |
| formatted <code class="docutils literal notranslate"><span class="pre">{dag_id}_{task_id}_{ds}</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">conf</span> <span class="pre">}}</span></code></td> |
| <td>the full configuration object located at |
| <code class="docutils literal notranslate"><span class="pre">airflow.configuration.conf</span></code> which |
| represents the content of your |
| <code class="docutils literal notranslate"><span class="pre">airflow.cfg</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">run_id</span> <span class="pre">}}</span></code></td> |
| <td>the <code class="docutils literal notranslate"><span class="pre">run_id</span></code> of the current DAG run</td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">dag_run</span> <span class="pre">}}</span></code></td> |
| <td>a reference to the DagRun object</td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">test_mode</span> <span class="pre">}}</span></code></td> |
| <td>whether the task instance was called using |
| the CLI’s test subcommand</td> |
| </tr> |
| </tbody> |
| </table> |
| <p>Note that you can access the object’s attributes and methods with simple |
| dot notation. Here are some examples of what is possible: |
| <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">task.owner</span> <span class="pre">}}</span></code>, <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">task.task_id</span> <span class="pre">}}</span></code>, <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ti.hostname</span> <span class="pre">}}</span></code>, … |
| Refer to the models documentation for more information on the objects’ |
| attributes and methods.</p> |
| <p>The <code class="docutils literal notranslate"><span class="pre">var</span></code> template variable allows you to access variables defined in Airflow’s |
| UI. You can access them as either plain-text or JSON. If you use JSON, you are |
| also able to walk nested structures, such as dictionaries like: |
| <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">var.json.my_dict_var.key1</span> <span class="pre">}}</span></code></p> |
| </div> |
| <div class="section" id="id15"> |
| <h3>Macros<a class="headerlink" href="#id15" title="Permalink to this headline">¶</a></h3> |
| <p>Macros are a way to expose objects to your templates and live under the |
| <code class="docutils literal notranslate"><span class="pre">macros</span></code> namespace in your templates.</p> |
| <p>A few commonly used libraries and methods are made available.</p> |
| <table border="1" class="docutils"> |
| <colgroup> |
| <col width="44%" /> |
| <col width="56%" /> |
| </colgroup> |
| <thead valign="bottom"> |
| <tr class="row-odd"><th class="head">Variable</th> |
| <th class="head">Description</th> |
| </tr> |
| </thead> |
| <tbody valign="top"> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">macros.datetime</span></code></td> |
| <td>The standard lib’s <code class="docutils literal notranslate"><span class="pre">datetime.datetime</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">macros.timedelta</span></code></td> |
| <td>The standard lib’s <code class="docutils literal notranslate"><span class="pre">datetime.timedelta</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">macros.dateutil</span></code></td> |
| <td>A reference to the <code class="docutils literal notranslate"><span class="pre">dateutil</span></code> package</td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">macros.time</span></code></td> |
| <td>The standard lib’s <code class="docutils literal notranslate"><span class="pre">time</span></code></td> |
| </tr> |
| <tr class="row-even"><td><code class="docutils literal notranslate"><span class="pre">macros.uuid</span></code></td> |
| <td>The standard lib’s <code class="docutils literal notranslate"><span class="pre">uuid</span></code></td> |
| </tr> |
| <tr class="row-odd"><td><code class="docutils literal notranslate"><span class="pre">macros.random</span></code></td> |
| <td>The standard lib’s <code class="docutils literal notranslate"><span class="pre">random</span></code></td> |
| </tr> |
| </tbody> |
| </table> |
| <p>Some airflow specific macros are also defined:</p> |
| <span class="target" id="module-airflow.macros"></span><dl class="function"> |
| <dt id="airflow.macros.ds_add"> |
| <code class="descclassname">airflow.macros.</code><code class="descname">ds_add</code><span class="sig-paren">(</span><em>ds</em>, <em>days</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/macros.html#ds_add"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.macros.ds_add" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Add or subtract days from a YYYY-MM-DD</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>ds</strong> (<em>str</em>) – anchor date in <code class="docutils literal notranslate"><span class="pre">YYYY-MM-DD</span></code> format to add to</li> |
| <li><strong>days</strong> (<em>int</em>) – number of days to add to the ds, you can use negative values</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ds_add</span><span class="p">(</span><span class="s1">'2015-01-01'</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span> |
| <span class="go">'2015-01-06'</span> |
| <span class="gp">>>> </span><span class="n">ds_add</span><span class="p">(</span><span class="s1">'2015-01-06'</span><span class="p">,</span> <span class="o">-</span><span class="mi">5</span><span class="p">)</span> |
| <span class="go">'2015-01-01'</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.macros.ds_format"> |
| <code class="descclassname">airflow.macros.</code><code class="descname">ds_format</code><span class="sig-paren">(</span><em>ds</em>, <em>input_format</em>, <em>output_format</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/macros.html#ds_format"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.macros.ds_format" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Takes an input string and outputs another string |
| as specified in the output format</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>ds</strong> (<em>str</em>) – input string which contains a date</li> |
| <li><strong>input_format</strong> (<em>str</em>) – input string format. E.g. %Y-%m-%d</li> |
| <li><strong>output_format</strong> (<em>str</em>) – output string format E.g. %Y-%m-%d</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ds_format</span><span class="p">(</span><span class="s1">'2015-01-01'</span><span class="p">,</span> <span class="s2">"%Y-%m-</span><span class="si">%d</span><span class="s2">"</span><span class="p">,</span> <span class="s2">"%m-</span><span class="si">%d</span><span class="s2">-%y"</span><span class="p">)</span> |
| <span class="go">'01-01-15'</span> |
| <span class="gp">>>> </span><span class="n">ds_format</span><span class="p">(</span><span class="s1">'1/5/2015'</span><span class="p">,</span> <span class="s2">"%m/</span><span class="si">%d</span><span class="s2">/%Y"</span><span class="p">,</span> <span class="s2">"%Y-%m-</span><span class="si">%d</span><span class="s2">"</span><span class="p">)</span> |
| <span class="go">'2015-01-05'</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.macros.random"> |
| <code class="descclassname">airflow.macros.</code><code class="descname">random</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → x in the interval [0, 1).<a class="headerlink" href="#airflow.macros.random" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.macros.hive.closest_ds_partition"> |
| <code class="descclassname">airflow.macros.hive.</code><code class="descname">closest_ds_partition</code><span class="sig-paren">(</span><em>table</em>, <em>ds</em>, <em>before=True</em>, <em>schema='default'</em>, <em>metastore_conn_id='metastore_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/macros/hive.html#closest_ds_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.macros.hive.closest_ds_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This function finds the date in a list closest to the target date. |
| An optional parameter can be given to get the closest before or after.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>table</strong> (<em>str</em>) – A hive table name</li> |
| <li><strong>ds</strong> (<em>datetime.date list</em>) – A datestamp <code class="docutils literal notranslate"><span class="pre">%Y-%m-%d</span></code> e.g. <code class="docutils literal notranslate"><span class="pre">yyyy-mm-dd</span></code></li> |
| <li><strong>before</strong> (<em>bool</em><em> or </em><em>None</em>) – closest before (True), after (False) or either side of ds</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The closest date</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str or None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">tbl</span> <span class="o">=</span> <span class="s1">'airflow.static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">closest_ds_partition</span><span class="p">(</span><span class="n">tbl</span><span class="p">,</span> <span class="s1">'2015-01-02'</span><span class="p">)</span> |
| <span class="go">'2015-01-01'</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.macros.hive.max_partition"> |
| <code class="descclassname">airflow.macros.hive.</code><code class="descname">max_partition</code><span class="sig-paren">(</span><em>table</em>, <em>schema='default'</em>, <em>field=None</em>, <em>filter_map=None</em>, <em>metastore_conn_id='metastore_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/macros/hive.html#max_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.macros.hive.max_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the max partition for a table.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>schema</strong> (<em>string</em>) – The hive schema the table lives in</li> |
| <li><strong>table</strong> (<em>string</em>) – The hive table you are interested in, supports the dot |
| notation as in “my_database.my_table”, if a dot is found, |
| the schema param is disregarded</li> |
| <li><strong>metastore_conn_id</strong> (<em>string</em>) – The hive connection you are interested in. |
| If your default is set you don’t need to use this parameter.</li> |
| <li><strong>filter_map</strong> (<em>map</em>) – partition_key:partition_value map used for partition filtering, |
| e.g. {‘key1’: ‘value1’, ‘key2’: ‘value2’}. |
| Only partitions matching all partition_key:partition_value |
| pairs will be considered as candidates of max partition.</li> |
| <li><strong>field</strong> (<em>str</em>) – the field to get the max value from. If there’s only |
| one partition field, this will be inferred</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">max_partition</span><span class="p">(</span><span class="s1">'airflow.static_babynames_partitioned'</span><span class="p">)</span> |
| <span class="go">'2015-01-01'</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="models"> |
| <span id="models-ref"></span><h2>Models<a class="headerlink" href="#models" title="Permalink to this headline">¶</a></h2> |
| <p>Models are built on top of the SQLAlchemy ORM Base class, and instances are |
| persisted in the database.</p> |
| <span class="target" id="module-airflow.models"></span><dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">BaseOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Abstract base class for all operators. Since operators create objects that |
| become nodes in the dag, BaseOperator contains many recursive methods for |
| dag crawling behavior. To derive this class, you are expected to override |
| the constructor as well as the ‘execute’ method.</p> |
| <p>Operators derived from this class should perform or trigger certain tasks |
| synchronously (wait for completion). Example of operators could be an |
| operator that runs a Pig job (PigOperator), a sensor operator that |
| waits for a partition to land in Hive (HiveSensorOperator), or one that |
| moves data from Hive to MySQL (Hive2MySqlOperator). Instances of these |
| operators (tasks) target specific operations, running specific scripts, |
| functions or data transfers.</p> |
| <p>This class is abstract and shouldn’t be instantiated. Instantiating a |
| class derived from this one results in the creation of a task object, |
| which ultimately becomes a node in DAG objects. Task dependencies should |
| be set by using the set_upstream and/or set_downstream methods.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>task_id</strong> (<em>string</em>) – a unique, meaningful id for the task</li> |
| <li><strong>owner</strong> (<em>string</em>) – the owner of the task, using the unix username is recommended</li> |
| <li><strong>retries</strong> (<em>int</em>) – the number of retries that should be performed before |
| failing the task</li> |
| <li><strong>retry_delay</strong> (<em>timedelta</em>) – delay between retries</li> |
| <li><strong>retry_exponential_backoff</strong> (<em>bool</em>) – allow progressive longer waits between |
| retries by using exponential backoff algorithm on retry delay (delay |
| will be converted into seconds)</li> |
| <li><strong>max_retry_delay</strong> (<em>timedelta</em>) – maximum delay interval between retries</li> |
| <li><strong>start_date</strong> (<em>datetime</em>) – The <code class="docutils literal notranslate"><span class="pre">start_date</span></code> for the task, determines |
| the <code class="docutils literal notranslate"><span class="pre">execution_date</span></code> for the first task instance. The best practice |
| is to have the start_date rounded |
| to your DAG’s <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code>. Daily jobs have their start_date |
| some day at 00:00:00, hourly jobs have their start_date at 00:00 |
| of a specific hour. Note that Airflow simply looks at the latest |
| <code class="docutils literal notranslate"><span class="pre">execution_date</span></code> and adds the <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code> to determine |
| the next <code class="docutils literal notranslate"><span class="pre">execution_date</span></code>. It is also very important |
| to note that different tasks’ dependencies |
| need to line up in time. If task A depends on task B and their |
| start_date are offset in a way that their execution_date don’t line |
| up, A’s dependencies will never be met. If you are looking to delay |
| a task, for example running a daily task at 2AM, look into the |
| <code class="docutils literal notranslate"><span class="pre">TimeSensor</span></code> and <code class="docutils literal notranslate"><span class="pre">TimeDeltaSensor</span></code>. We advise against using |
| dynamic <code class="docutils literal notranslate"><span class="pre">start_date</span></code> and recommend using fixed ones. Read the |
| FAQ entry about start_date for more information.</li> |
| <li><strong>end_date</strong> (<em>datetime</em>) – if specified, the scheduler won’t go beyond this date</li> |
| <li><strong>depends_on_past</strong> (<em>bool</em>) – when set to true, task instances will run |
| sequentially while relying on the previous task’s schedule to |
| succeed. The task instance for the start_date is allowed to run.</li> |
| <li><strong>wait_for_downstream</strong> (<em>bool</em>) – when set to true, an instance of task |
| X will wait for tasks immediately downstream of the previous instance |
| of task X to finish successfully before it runs. This is useful if the |
| different instances of a task X alter the same asset, and this asset |
| is used by tasks downstream of task X. Note that depends_on_past |
| is forced to True wherever wait_for_downstream is used.</li> |
| <li><strong>queue</strong> (<em>str</em>) – which queue to target when running this job. Not |
| all executors implement queue management, the CeleryExecutor |
| does support targeting specific queues.</li> |
| <li><strong>dag</strong> (<a class="reference internal" href="#airflow.models.DAG" title="airflow.models.DAG"><em>DAG</em></a>) – a reference to the dag the task is attached to (if any)</li> |
| <li><strong>priority_weight</strong> (<em>int</em>) – priority weight of this task against other task. |
| This allows the executor to trigger higher priority tasks before |
| others when things get backed up.</li> |
| <li><strong>weight_rule</strong> (<em>str</em>) – weighting method used for the effective total |
| priority weight of the task. Options are: |
| <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">downstream</span> <span class="pre">|</span> <span class="pre">upstream</span> <span class="pre">|</span> <span class="pre">absolute</span> <span class="pre">}</span></code> default is <code class="docutils literal notranslate"><span class="pre">downstream</span></code> |
| When set to <code class="docutils literal notranslate"><span class="pre">downstream</span></code> the effective weight of the task is the |
| aggregate sum of all downstream descendants. As a result, upstream |
| tasks will have higher weight and will be scheduled more aggressively |
| when using positive weight values. This is useful when you have |
| multiple dag run instances and desire to have all upstream tasks to |
| complete for all runs before each dag can continue processing |
| downstream tasks. When set to <code class="docutils literal notranslate"><span class="pre">upstream</span></code> the effective weight is the |
| aggregate sum of all upstream ancestors. This is the opposite where |
| downtream tasks have higher weight and will be scheduled more |
| aggressively when using positive weight values. This is useful when you |
| have multiple dag run instances and prefer to have each dag complete |
| before starting upstream tasks of other dags. When set to |
| <code class="docutils literal notranslate"><span class="pre">absolute</span></code>, the effective weight is the exact <code class="docutils literal notranslate"><span class="pre">priority_weight</span></code> |
| specified without additional weighting. You may want to do this when |
| you know exactly what priority weight each task should have. |
| Additionally, when set to <code class="docutils literal notranslate"><span class="pre">absolute</span></code>, there is bonus effect of |
| significantly speeding up the task creation process as for very large |
| DAGS. Options can be set as string or using the constants defined in |
| the static class <code class="docutils literal notranslate"><span class="pre">airflow.utils.WeightRule</span></code></li> |
| <li><strong>pool</strong> (<em>str</em>) – the slot pool this task should run in, slot pools are a |
| way to limit concurrency for certain tasks</li> |
| <li><strong>sla</strong> (<em>datetime.timedelta</em>) – time by which the job is expected to succeed. Note that |
| this represents the <code class="docutils literal notranslate"><span class="pre">timedelta</span></code> after the period is closed. For |
| example if you set an SLA of 1 hour, the scheduler would send an email |
| soon after 1:00AM on the <code class="docutils literal notranslate"><span class="pre">2016-01-02</span></code> if the <code class="docutils literal notranslate"><span class="pre">2016-01-01</span></code> instance |
| has not succeeded yet. |
| The scheduler pays special attention for jobs with an SLA and |
| sends alert |
| emails for sla misses. SLA misses are also recorded in the database |
| for future reference. All tasks that share the same SLA time |
| get bundled in a single email, sent soon after that time. SLA |
| notification are sent once and only once for each task instance.</li> |
| <li><strong>execution_timeout</strong> (<em>datetime.timedelta</em>) – max time allowed for the execution of |
| this task instance, if it goes beyond it will raise and fail.</li> |
| <li><strong>on_failure_callback</strong> (<em>callable</em>) – a function to be called when a task instance |
| of this task fails. a context dictionary is passed as a single |
| parameter to this function. Context contains references to related |
| objects to the task instance and is documented under the macros |
| section of the API.</li> |
| <li><strong>on_retry_callback</strong> (<em>callable</em>) – much like the <code class="docutils literal notranslate"><span class="pre">on_failure_callback</span></code> except |
| that it is executed when retries occur.</li> |
| <li><strong>on_success_callback</strong> (<em>callable</em>) – much like the <code class="docutils literal notranslate"><span class="pre">on_failure_callback</span></code> except |
| that it is executed when the task succeeds.</li> |
| <li><strong>trigger_rule</strong> (<em>str</em>) – defines the rule by which dependencies are applied |
| for the task to get triggered. Options are: |
| <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">all_success</span> <span class="pre">|</span> <span class="pre">all_failed</span> <span class="pre">|</span> <span class="pre">all_done</span> <span class="pre">|</span> <span class="pre">one_success</span> <span class="pre">|</span> |
| <span class="pre">one_failed</span> <span class="pre">|</span> <span class="pre">none_failed</span> <span class="pre">|</span> <span class="pre">dummy}</span></code> |
| default is <code class="docutils literal notranslate"><span class="pre">all_success</span></code>. Options can be set as string or |
| using the constants defined in the static class |
| <code class="docutils literal notranslate"><span class="pre">airflow.utils.TriggerRule</span></code></li> |
| <li><strong>resources</strong> (<em>dict</em>) – A map of resource parameter names (the argument names of the |
| Resources constructor) to their values.</li> |
| <li><strong>run_as_user</strong> (<em>str</em>) – unix username to impersonate while running the task</li> |
| <li><strong>task_concurrency</strong> (<em>int</em>) – When set, a task will be able to limit the concurrent |
| runs across execution_dates</li> |
| <li><strong>executor_config</strong> (<em>dict</em>) – <p>Additional task-level configuration parameters that are |
| interpreted by a specific executor. Parameters are namespaced by the name of |
| executor.</p> |
| <p><strong>Example</strong>: to run this task in a specific docker container through |
| the KubernetesExecutor</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MyOperator</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> |
| <span class="n">executor_config</span><span class="o">=</span><span class="p">{</span> |
| <span class="s2">"KubernetesExecutor"</span><span class="p">:</span> |
| <span class="p">{</span><span class="s2">"image"</span><span class="p">:</span> <span class="s2">"myCustomDockerImage"</span><span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt> |
| <code class="descname">clear</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.clear"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Clears the state of task instances associated with the task, following |
| the parameters specified.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt> |
| <code class="descname">dag</code></dt> |
| <dd><p>Returns the Operator’s DAG if set, otherwise raises an error</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt> |
| <code class="descname">deps</code></dt> |
| <dd><p>Returns the list of dependencies for the operator. These differ from execution |
| context dependencies in that they are specific to tasks and can be |
| extended/overridden by subclasses.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt> |
| <code class="descname">downstream_list</code></dt> |
| <dd><p>@property: list of tasks directly downstream</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.execute"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_direct_relative_ids</code><span class="sig-paren">(</span><em>upstream=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_direct_relative_ids"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Get the direct relative ids to the current task, upstream or |
| downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_direct_relatives</code><span class="sig-paren">(</span><em>upstream=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_direct_relatives"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Get the direct relatives to the current task, upstream or |
| downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_flat_relative_ids</code><span class="sig-paren">(</span><em>upstream=False</em>, <em>found_descendants=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_flat_relative_ids"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Get a flat list of relatives’ ids, either upstream or downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_flat_relatives</code><span class="sig-paren">(</span><em>upstream=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_flat_relatives"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Get a flat list of relatives, either upstream or downstream.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_task_instances</code><span class="sig-paren">(</span><em>session</em>, <em>start_date=None</em>, <em>end_date=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.get_task_instances"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Get a set of task instance related to this task for a specific date |
| range.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">has_dag</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.has_dag"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Returns True if the Operator has been assigned to a DAG.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.on_kill"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Override this method to cleanup subprocesses when a task instance |
| gets killed. Any use of the threading, subprocess or multiprocessing |
| module within an operator needs to be cleaned up or it will leave |
| ghost processes behind.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">post_execute</code><span class="sig-paren">(</span><em>context</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.post_execute"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>This hook is triggered right after self.execute() is called. |
| It is passed the execution context and any results returned by the |
| operator.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">pre_execute</code><span class="sig-paren">(</span><em>context</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.pre_execute"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>This hook is triggered right before self.execute() is called.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">prepare_template</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.prepare_template"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Hook that is triggered after the templated fields get replaced |
| by their content. If you need your operator to alter the |
| content of the file before the template is rendered, |
| it should override this method to do so.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">render_template</code><span class="sig-paren">(</span><em>attr</em>, <em>content</em>, <em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.render_template"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Renders a template either from a file or directly in a field, and returns |
| the rendered result.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">render_template_from_field</code><span class="sig-paren">(</span><em>attr</em>, <em>content</em>, <em>context</em>, <em>jinja_env</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.render_template_from_field"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Renders a template from a field. If the field is a string, it will |
| simply render the string and return the result. If it is a collection or |
| nested set of collections, it will traverse the structure and render |
| all strings in it.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">run</code><span class="sig-paren">(</span><em>start_date=None</em>, <em>end_date=None</em>, <em>ignore_first_depends_on_past=False</em>, <em>ignore_ti_state=False</em>, <em>mark_success=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.run"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Run a set of task instances for a date range.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt> |
| <code class="descname">schedule_interval</code></dt> |
| <dd><p>The schedule interval of the DAG always wins over individual tasks so |
| that tasks within a DAG always line up. The task still needs a |
| schedule_interval as it may not be attached to a DAG.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">set_downstream</code><span class="sig-paren">(</span><em>task_or_task_list</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.set_downstream"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Set a task or a task list to be directly downstream from the current |
| task.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">set_upstream</code><span class="sig-paren">(</span><em>task_or_task_list</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.set_upstream"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Set a task or a task list to be directly upstream from the current |
| task.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt> |
| <code class="descname">upstream_list</code></dt> |
| <dd><p>@property: list of tasks directly upstream</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">xcom_pull</code><span class="sig-paren">(</span><em>context</em>, <em>task_ids=None</em>, <em>dag_id=None</em>, <em>key=u'return_value'</em>, <em>include_prior_dates=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.xcom_pull"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>See TaskInstance.xcom_pull()</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">xcom_push</code><span class="sig-paren">(</span><em>context</em>, <em>key</em>, <em>value</em>, <em>execution_date=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#BaseOperator.xcom_push"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>See TaskInstance.xcom_push()</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.Chart"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">Chart</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Chart"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Chart" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.Connection"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">Connection</code><span class="sig-paren">(</span><em>conn_id=None</em>, <em>conn_type=None</em>, <em>host=None</em>, <em>login=None</em>, <em>password=None</em>, <em>schema=None</em>, <em>port=None</em>, <em>extra=None</em>, <em>uri=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Connection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Placeholder to store information about different database instances |
| connection information. The idea here is that scripts use references to |
| database instances (conn_id) instead of hard coding hostname, logins and |
| passwords when using operators or hooks.</p> |
| <dl class="attribute"> |
| <dt id="airflow.models.Connection.extra_dejson"> |
| <code class="descname">extra_dejson</code><a class="headerlink" href="#airflow.models.Connection.extra_dejson" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the extra property by deserializing json.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.DAG"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">DAG</code><span class="sig-paren">(</span><em>dag_id</em>, <em>description=u''</em>, <em>schedule_interval=datetime.timedelta(1)</em>, <em>start_date=None</em>, <em>end_date=None</em>, <em>full_filepath=None</em>, <em>template_searchpath=None</em>, <em>user_defined_macros=None</em>, <em>user_defined_filters=None</em>, <em>default_args=None</em>, <em>concurrency=16</em>, <em>max_active_runs=16</em>, <em>dagrun_timeout=None</em>, <em>sla_miss_callback=None</em>, <em>default_view=u'tree'</em>, <em>orientation='LR'</em>, <em>catchup=True</em>, <em>on_success_callback=None</em>, <em>on_failure_callback=None</em>, <em>params=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.dag.base_dag.BaseDag</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>A dag (directed acyclic graph) is a collection of tasks with directional |
| dependencies. A dag also has a schedule, a start end an end date |
| (optional). For each schedule, (say daily or hourly), the DAG needs to run |
| each individual tasks as their dependencies are met. Certain tasks have |
| the property of depending on their own past, meaning that they can’t run |
| until their previous schedule (and upstream tasks) are completed.</p> |
| <p>DAGs essentially act as namespaces for tasks. A task_id can only be |
| added once to a DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dag_id</strong> (<em>string</em>) – The id of the DAG</li> |
| <li><strong>description</strong> (<em>string</em>) – The description for the DAG to e.g. be shown on the webserver</li> |
| <li><strong>schedule_interval</strong> (<em>datetime.timedelta</em><em> or |
| </em><em>dateutil.relativedelta.relativedelta</em><em> or </em><em>str that acts as a cron |
| expression</em>) – Defines how often that DAG runs, this |
| timedelta object gets added to your latest task instance’s |
| execution_date to figure out the next schedule</li> |
| <li><strong>start_date</strong> (<em>datetime.datetime</em>) – The timestamp from which the scheduler will |
| attempt to backfill</li> |
| <li><strong>end_date</strong> (<em>datetime.datetime</em>) – A date beyond which your DAG won’t run, leave to None |
| for open ended scheduling</li> |
| <li><strong>template_searchpath</strong> (<em>string</em><em> or </em><em>list of stings</em>) – This list of folders (non relative) |
| defines where jinja will look for your templates. Order matters. |
| Note that jinja/airflow includes the path of your DAG file by |
| default</li> |
| <li><strong>user_defined_macros</strong> (<em>dict</em>) – a dictionary of macros that will be exposed |
| in your jinja templates. For example, passing <code class="docutils literal notranslate"><span class="pre">dict(foo='bar')</span></code> |
| to this argument allows you to <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">foo</span> <span class="pre">}}</span></code> in all jinja |
| templates related to this DAG. Note that you can pass any |
| type of object here.</li> |
| <li><strong>user_defined_filters</strong> (<em>dict</em>) – a dictionary of filters that will be exposed |
| in your jinja templates. For example, passing |
| <code class="docutils literal notranslate"><span class="pre">dict(hello=lambda</span> <span class="pre">name:</span> <span class="pre">'Hello</span> <span class="pre">%s'</span> <span class="pre">%</span> <span class="pre">name)</span></code> to this argument allows |
| you to <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">'world'</span> <span class="pre">|</span> <span class="pre">hello</span> <span class="pre">}}</span></code> in all jinja templates related to |
| this DAG.</li> |
| <li><strong>default_args</strong> (<em>dict</em>) – A dictionary of default parameters to be used |
| as constructor keyword parameters when initialising operators. |
| Note that operators have the same hook, and precede those defined |
| here, meaning that if your dict contains <cite>‘depends_on_past’: True</cite> |
| here and <cite>‘depends_on_past’: False</cite> in the operator’s call |
| <cite>default_args</cite>, the actual value will be <cite>False</cite>.</li> |
| <li><strong>params</strong> (<em>dict</em>) – a dictionary of DAG level parameters that are made |
| accessible in templates, namespaced under <cite>params</cite>. These |
| params can be overridden at the task level.</li> |
| <li><strong>concurrency</strong> (<em>int</em>) – the number of task instances allowed to run |
| concurrently</li> |
| <li><strong>max_active_runs</strong> (<em>int</em>) – maximum number of active DAG runs, beyond this |
| number of DAG runs in a running state, the scheduler won’t create |
| new active DAG runs</li> |
| <li><strong>dagrun_timeout</strong> (<em>datetime.timedelta</em>) – specify how long a DagRun should be up before |
| timing out / failing, so that new DagRuns can be created</li> |
| <li><strong>sla_miss_callback</strong> (<em>types.FunctionType</em>) – specify a function to call when reporting SLA |
| timeouts.</li> |
| <li><strong>default_view</strong> (<em>string</em>) – Specify DAG default view (tree, graph, duration, |
| gantt, landing_times)</li> |
| <li><strong>orientation</strong> (<em>string</em>) – Specify DAG orientation in graph view (LR, TB, RL, BT)</li> |
| <li><strong>catchup</strong> (<em>bool</em>) – Perform scheduler catchup (or only run latest)? Defaults to True</li> |
| <li><strong>on_failure_callback</strong> (<em>callable</em>) – A function to be called when a DagRun of this dag fails. |
| A context dictionary is passed as a single parameter to this function.</li> |
| <li><strong>on_success_callback</strong> (<em>callable</em>) – Much like the <code class="docutils literal notranslate"><span class="pre">on_failure_callback</span></code> except |
| that it is executed when the dag succeeds.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.models.DAG.add_task"> |
| <code class="descname">add_task</code><span class="sig-paren">(</span><em>task</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.add_task"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.add_task" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Add a task to the DAG</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>task</strong> (<em>task</em>) – the task you want to add</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.add_tasks"> |
| <code class="descname">add_tasks</code><span class="sig-paren">(</span><em>tasks</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.add_tasks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.add_tasks" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Add a list of tasks to the DAG</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>tasks</strong> (<em>list of tasks</em>) – a lit of tasks you want to add</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.clear"> |
| <code class="descname">clear</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.clear"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.clear" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears a set of task instances associated with the current dag for |
| a specified date range.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.cli"> |
| <code class="descname">cli</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.cli"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.cli" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Exposes a CLI specific to this DAG</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.DAG.concurrency_reached"> |
| <code class="descname">concurrency_reached</code><a class="headerlink" href="#airflow.models.DAG.concurrency_reached" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boolean indicating whether the concurrency limit for this DAG |
| has been reached</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.create_dagrun"> |
| <code class="descname">create_dagrun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.create_dagrun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.create_dagrun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a dag run from this dag including the tasks associated with this dag. |
| Returns the dag run.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>run_id</strong> (<em>string</em>) – defines the the run id for this dag run</li> |
| <li><strong>execution_date</strong> (<em>datetime</em>) – the execution date of this dag run</li> |
| <li><strong>state</strong> (<em>State</em>) – the state of the dag run</li> |
| <li><strong>start_date</strong> (<em>datetime</em>) – the date this dag run should be evaluated</li> |
| <li><strong>external_trigger</strong> (<em>bool</em>) – whether this dag run is externally triggered</li> |
| <li><strong>session</strong> (<em>Session</em>) – database session</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DAG.deactivate_stale_dags"> |
| <em class="property">static </em><code class="descname">deactivate_stale_dags</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.deactivate_stale_dags"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.deactivate_stale_dags" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deactivate any DAGs that were last touched by the scheduler before |
| the expiration date. These DAGs were likely deleted.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>expiration_date</strong> (<em>datetime</em>) – set inactive DAGs that were touched before this |
| time</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DAG.deactivate_unknown_dags"> |
| <em class="property">static </em><code class="descname">deactivate_unknown_dags</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.deactivate_unknown_dags"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.deactivate_unknown_dags" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Given a list of known DAGs, deactivate any other DAGs that are |
| marked as active in the ORM</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>active_dag_ids</strong> (<em>list</em><em>[</em><em>unicode</em><em>]</em>) – list of DAG IDs that are active</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.DAG.filepath"> |
| <code class="descname">filepath</code><a class="headerlink" href="#airflow.models.DAG.filepath" title="Permalink to this definition">¶</a></dt> |
| <dd><p>File location of where the dag object is instantiated</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.DAG.folder"> |
| <code class="descname">folder</code><a class="headerlink" href="#airflow.models.DAG.folder" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Folder location of where the dag object is instantiated</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.following_schedule"> |
| <code class="descname">following_schedule</code><span class="sig-paren">(</span><em>dttm</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.following_schedule"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.following_schedule" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Calculates the following schedule for this dag in UTC.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>dttm</strong> – utc datetime</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">utc datetime</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.get_active_runs"> |
| <code class="descname">get_active_runs</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_active_runs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_active_runs" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of dag run execution dates currently running</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">List of execution dates</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.get_dagrun"> |
| <code class="descname">get_dagrun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_dagrun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_dagrun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the dag run for a given execution date if it exists, otherwise |
| none.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>execution_date</strong> – The execution date of the DagRun to find.</li> |
| <li><strong>session</strong> – </li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The DagRun if found, otherwise None.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.get_last_dagrun"> |
| <code class="descname">get_last_dagrun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_last_dagrun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_last_dagrun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the last dag run for this dag, None if there was none. |
| Last dag run can be any type of run eg. scheduled or backfilled. |
| Overridden DagRuns are ignored</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.get_num_active_runs"> |
| <code class="descname">get_num_active_runs</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_num_active_runs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_num_active_runs" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the number of active “running” dag runs</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>external_trigger</strong> (<em>bool</em>) – True for externally triggered active dag runs</li> |
| <li><strong>session</strong> – </li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">number greater than 0 for active dag runs</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DAG.get_num_task_instances"> |
| <em class="property">static </em><code class="descname">get_num_task_instances</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_num_task_instances"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_num_task_instances" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the number of task instances in the given DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>session</strong> – ORM session</li> |
| <li><strong>dag_id</strong> (<em>unicode</em>) – ID of the DAG to get the task concurrency of</li> |
| <li><strong>task_ids</strong> (<em>list</em><em>[</em><em>unicode</em><em>]</em>) – A list of valid task IDs for the given DAG</li> |
| <li><strong>states</strong> (<em>list</em><em>[</em><em>state</em><em>]</em>) – A list of states to filter by if supplied</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The number of running tasks</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">int</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.get_run_dates"> |
| <code class="descname">get_run_dates</code><span class="sig-paren">(</span><em>start_date</em>, <em>end_date=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_run_dates"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_run_dates" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of dates between the interval received as parameter using this |
| dag’s schedule interval. Returned dates can be used for execution dates.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>start_date</strong> (<em>datetime</em>) – the start date of the interval</li> |
| <li><strong>end_date</strong> (<em>datetime</em>) – the end date of the interval, defaults to timezone.utcnow()</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">a list of dates within the interval following the dag’s schedule</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.get_template_env"> |
| <code class="descname">get_template_env</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.get_template_env"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.get_template_env" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a jinja2 Environment while taking into account the DAGs |
| template_searchpath, user_defined_macros and user_defined_filters</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.handle_callback"> |
| <code class="descname">handle_callback</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.handle_callback"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.handle_callback" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Triggers the appropriate callback depending on the value of success, namely the |
| on_failure_callback or on_success_callback. This method gets the context of a |
| single TaskInstance part of this DagRun and passes that to the callable along |
| with a ‘reason’, primarily to differentiate DagRun failures. |
| .. note:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>The logs end up in $AIRFLOW_HOME/logs/scheduler/latest/PROJECT/DAG_FILE.py.log |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dagrun</strong> – DagRun object</li> |
| <li><strong>success</strong> – Flag to specify if failure or success callback should be called</li> |
| <li><strong>reason</strong> – Completion reason</li> |
| <li><strong>session</strong> – Database session</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.is_fixed_time_schedule"> |
| <code class="descname">is_fixed_time_schedule</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.is_fixed_time_schedule"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.is_fixed_time_schedule" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Figures out if the DAG schedule has a fixed time (e.g. 3 AM).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">True if the schedule has a fixed time, False if not.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.DAG.is_paused"> |
| <code class="descname">is_paused</code><a class="headerlink" href="#airflow.models.DAG.is_paused" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boolean indicating whether this DAG is paused</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.DAG.latest_execution_date"> |
| <code class="descname">latest_execution_date</code><a class="headerlink" href="#airflow.models.DAG.latest_execution_date" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the latest date for which at least one dag run exists</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.normalize_schedule"> |
| <code class="descname">normalize_schedule</code><span class="sig-paren">(</span><em>dttm</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.normalize_schedule"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.normalize_schedule" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns dttm + interval unless dttm is first interval then it returns dttm</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.previous_schedule"> |
| <code class="descname">previous_schedule</code><span class="sig-paren">(</span><em>dttm</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.previous_schedule"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.previous_schedule" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Calculates the previous schedule for this dag in UTC</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>dttm</strong> – utc datetime</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">utc datetime</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.run"> |
| <code class="descname">run</code><span class="sig-paren">(</span><em>start_date=None</em>, <em>end_date=None</em>, <em>mark_success=False</em>, <em>local=False</em>, <em>executor=None</em>, <em>donot_pickle=False</em>, <em>ignore_task_deps=False</em>, <em>ignore_first_depends_on_past=False</em>, <em>pool=None</em>, <em>delay_on_limit_secs=1.0</em>, <em>verbose=False</em>, <em>conf=None</em>, <em>rerun_failed_tasks=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.run" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Runs the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>start_date</strong> (<em>datetime</em>) – the start date of the range to run</li> |
| <li><strong>end_date</strong> (<em>datetime</em>) – the end date of the range to run</li> |
| <li><strong>mark_success</strong> (<em>bool</em>) – True to mark jobs as succeeded without running them</li> |
| <li><strong>local</strong> (<em>bool</em>) – True to run the tasks using the LocalExecutor</li> |
| <li><strong>executor</strong> (<em>BaseExecutor</em>) – The executor instance to run the tasks</li> |
| <li><strong>donot_pickle</strong> (<em>bool</em>) – True to avoid pickling DAG object and send to workers</li> |
| <li><strong>ignore_task_deps</strong> (<em>bool</em>) – True to skip upstream tasks</li> |
| <li><strong>ignore_first_depends_on_past</strong> (<em>bool</em>) – True to ignore depends_on_past |
| dependencies for the first set of tasks only</li> |
| <li><strong>pool</strong> (<em>string</em>) – Resource pool to use</li> |
| <li><strong>delay_on_limit_secs</strong> (<em>float</em>) – Time in seconds to wait before next attempt to run |
| dag run when max_active_runs limit has been reached</li> |
| <li><strong>verbose</strong> (<em>boolean</em>) – Make logging output more verbose</li> |
| <li><strong>conf</strong> (<em>dict</em>) – user defined dictionary passed from CLI</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.set_dependency"> |
| <code class="descname">set_dependency</code><span class="sig-paren">(</span><em>upstream_task_id</em>, <em>downstream_task_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.set_dependency"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.set_dependency" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Simple utility method to set dependency between two tasks that |
| already have been added to the DAG using add_task()</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.sub_dag"> |
| <code class="descname">sub_dag</code><span class="sig-paren">(</span><em>task_regex</em>, <em>include_downstream=False</em>, <em>include_upstream=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.sub_dag"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.sub_dag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a subset of the current dag as a deep copy of the current dag |
| based on a regex that should match one or many tasks, and includes |
| upstream and downstream neighbours based on the flag passed.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.DAG.subdags"> |
| <code class="descname">subdags</code><a class="headerlink" href="#airflow.models.DAG.subdags" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of the subdag objects associated to this DAG</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.sync_to_db"> |
| <code class="descname">sync_to_db</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.sync_to_db"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.sync_to_db" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Save attributes about this DAG to the DB. Note that this method |
| can be called for both DAGs and SubDAGs. A SubDag is actually a |
| SubDagOperator.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>dag</strong> (<a class="reference internal" href="#airflow.models.DAG" title="airflow.models.DAG"><em>DAG</em></a>) – the DAG object to save to the DB</li> |
| <li><strong>sync_time</strong> (<em>datetime</em>) – The time that the DAG should be marked as sync’ed</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.test_cycle"> |
| <code class="descname">test_cycle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.test_cycle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.test_cycle" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check to see if there are any cycles in the DAG. Returns False if no cycle found, |
| otherwise raises exception.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.topological_sort"> |
| <code class="descname">topological_sort</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.topological_sort"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.topological_sort" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sorts tasks in topographical order, such that a task comes after any of its |
| upstream dependencies.</p> |
| <p>Heavily inspired by: |
| <a class="reference external" href="http://blog.jupo.org/2012/04/06/topological-sorting-acyclic-directed-graphs/">http://blog.jupo.org/2012/04/06/topological-sorting-acyclic-directed-graphs/</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">list of tasks in topological order</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DAG.tree_view"> |
| <code class="descname">tree_view</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DAG.tree_view"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DAG.tree_view" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Shows an ascii tree representation of the DAG</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.DagBag"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">DagBag</code><span class="sig-paren">(</span><em>dag_folder=None</em>, <em>executor=None</em>, <em>include_examples=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.dag.base_dag.BaseDagBag</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>A dagbag is a collection of dags, parsed out of a folder tree and has high |
| level configuration settings, like what database to use as a backend and |
| what executor to use to fire off tasks. This makes it easier to run |
| distinct environments for say production and development, tests, or for |
| different teams or security profiles. What would have been system level |
| settings are now dagbag level so that one system can run multiple, |
| independent settings sets.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dag_folder</strong> (<em>unicode</em>) – the folder to scan to find DAGs</li> |
| <li><strong>executor</strong> – the executor to use when executing task instances |
| in this DagBag</li> |
| <li><strong>include_examples</strong> (<em>bool</em>) – whether to include the examples that ship |
| with airflow or not</li> |
| <li><strong>has_logged</strong> – an instance boolean that gets flipped from False to True after a |
| file has been skipped. This is to prevent overloading the user with logging |
| messages about skipped files. Therefore only once per DagBag is a file logged |
| being skipped.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.bag_dag"> |
| <code class="descname">bag_dag</code><span class="sig-paren">(</span><em>dag</em>, <em>parent_dag</em>, <em>root_dag</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.bag_dag"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.bag_dag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Adds the DAG into the bag, recurses into sub dags. |
| Throws AirflowDagCycleException if a cycle is detected in this dag or its subdags</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.collect_dags"> |
| <code class="descname">collect_dags</code><span class="sig-paren">(</span><em>dag_folder=None</em>, <em>only_if_updated=True</em>, <em>include_examples=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.collect_dags"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.collect_dags" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Given a file path or a folder, this method looks for python modules, |
| imports them and adds them to the dagbag collection.</p> |
| <p>Note that if a <code class="docutils literal notranslate"><span class="pre">.airflowignore</span></code> file is found while processing |
| the directory, it will behave much like a <code class="docutils literal notranslate"><span class="pre">.gitignore</span></code>, |
| ignoring files that match any of the regex patterns specified |
| in the file.</p> |
| <p><strong>Note</strong>: The patterns in .airflowignore are treated as |
| un-anchored regexes, not shell-like glob patterns.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.dagbag_report"> |
| <code class="descname">dagbag_report</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.dagbag_report"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.dagbag_report" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Prints a report around DagBag loading stats</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.get_dag"> |
| <code class="descname">get_dag</code><span class="sig-paren">(</span><em>dag_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.get_dag"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.get_dag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the DAG out of the dictionary, and refreshes it if expired</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.kill_zombies"> |
| <code class="descname">kill_zombies</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.kill_zombies"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.kill_zombies" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fail given zombie tasks, which are tasks that haven’t |
| had a heartbeat for too long, in the current DagBag.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>zombies</strong> (<em>SimpleTaskInstance</em>) – zombie task instances to kill.</li> |
| <li><strong>session</strong> – DB session.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:type Session.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.process_file"> |
| <code class="descname">process_file</code><span class="sig-paren">(</span><em>filepath</em>, <em>only_if_updated=True</em>, <em>safe_mode=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.process_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.process_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Given a path to a python module or zip file, this method imports |
| the module and look for dag objects within it.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagBag.size"> |
| <code class="descname">size</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagBag.size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagBag.size" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">the amount of dags contained in this dagbag</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.DagModel"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">DagModel</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagModel"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagModel" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.DagPickle"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">DagPickle</code><span class="sig-paren">(</span><em>dag</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagPickle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagPickle" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <p>Dags can originate from different places (user repos, master repo, …) |
| and also get executed in different places (different executors). This |
| object represents a version of a DAG and becomes a source of truth for |
| a BackfillJob execution. A pickle is a native python serialized object, |
| and in this case gets stored in the database for the duration of the job.</p> |
| <p>The executors pick up the DagPickle id and read the dag definition from |
| the database.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.DagRun"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">DagRun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>DagRun describes an instance of a Dag. It can be created |
| by the scheduler (for regular runs) or by an external trigger</p> |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DagRun.find"> |
| <em class="property">static </em><code class="descname">find</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.find"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.find" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a set of dag runs for the given search criteria.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dag_id</strong> (<em>integer</em><em>, </em><em>list</em>) – the dag_id to find dag runs for</li> |
| <li><strong>run_id</strong> (<em>string</em>) – defines the the run id for this dag run</li> |
| <li><strong>execution_date</strong> (<em>datetime</em>) – the execution date</li> |
| <li><strong>state</strong> (<em>State</em>) – the state of the dag run</li> |
| <li><strong>external_trigger</strong> (<em>bool</em>) – whether this dag run is externally triggered</li> |
| <li><strong>no_backfills</strong> – return no backfills (True), return all (False).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>Defaults to False |
| :type no_backfills: bool |
| :param session: database session |
| :type session: Session</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.get_dag"> |
| <code class="descname">get_dag</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_dag"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_dag" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the Dag associated with this DagRun.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">DAG</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="classmethod"> |
| <dt id="airflow.models.DagRun.get_latest_runs"> |
| <em class="property">classmethod </em><code class="descname">get_latest_runs</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_latest_runs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_latest_runs" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the latest DagRun for each DAG.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.get_previous_dagrun"> |
| <code class="descname">get_previous_dagrun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_previous_dagrun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_previous_dagrun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The previous DagRun, if there is one</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.get_previous_scheduled_dagrun"> |
| <code class="descname">get_previous_scheduled_dagrun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_previous_scheduled_dagrun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_previous_scheduled_dagrun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The previous, SCHEDULED DagRun, if there is one</p> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DagRun.get_run"> |
| <em class="property">static </em><code class="descname">get_run</code><span class="sig-paren">(</span><em>session</em>, <em>dag_id</em>, <em>execution_date</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_run" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>dag_id</strong> (<em>unicode</em>) – DAG ID</li> |
| <li><strong>execution_date</strong> (<em>datetime</em>) – execution date</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">DagRun corresponding to the given dag_id and execution date</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>if one exists. None otherwise. |
| :rtype: DagRun</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.get_task_instance"> |
| <code class="descname">get_task_instance</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_task_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_task_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the task instance specified by task_id for this dag run</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>task_id</strong> – the task id</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.get_task_instances"> |
| <code class="descname">get_task_instances</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.get_task_instances"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.get_task_instances" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the task instances for this dag run</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.refresh_from_db"> |
| <code class="descname">refresh_from_db</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.refresh_from_db"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.refresh_from_db" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reloads the current dagrun from the database |
| :param session: database session</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.update_state"> |
| <code class="descname">update_state</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.update_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.update_state" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Determines the overall state of the DagRun based on the state |
| of its TaskInstances.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">State</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.DagRun.verify_integrity"> |
| <code class="descname">verify_integrity</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagRun.verify_integrity"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagRun.verify_integrity" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Verifies the DagRun by checking for removed tasks or tasks that are not in the |
| database yet. It will set state to removed or add the task if required.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.DagStat"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">DagStat</code><span class="sig-paren">(</span><em>dag_id</em>, <em>state</em>, <em>count=0</em>, <em>dirty=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagStat"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagStat" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DagStat.create"> |
| <em class="property">static </em><code class="descname">create</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagStat.create"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagStat.create" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates the missing states the stats table for the dag specified</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>dag_id</strong> – dag id of the dag to create stats for</li> |
| <li><strong>session</strong> – database session</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"></p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DagStat.set_dirty"> |
| <em class="property">static </em><code class="descname">set_dirty</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagStat.set_dirty"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagStat.set_dirty" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>dag_id</strong> – the dag_id to mark dirty</li> |
| <li><strong>session</strong> – database session</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"></p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.DagStat.update"> |
| <em class="property">static </em><code class="descname">update</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#DagStat.update"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.DagStat.update" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates the stats for dirty/out-of-sync dags</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dag_ids</strong> (<em>list</em>) – dag_ids to be updated</li> |
| <li><strong>dirty_only</strong> (<em>bool</em>) – only updated for marked dirty, defaults to True</li> |
| <li><strong>session</strong> (<em>Session</em>) – db session to use</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.ImportError"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">ImportError</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#ImportError"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.ImportError" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="exception"> |
| <dt id="airflow.models.InvalidFernetToken"> |
| <em class="property">exception </em><code class="descclassname">airflow.models.</code><code class="descname">InvalidFernetToken</code><a class="reference internal" href="_modules/airflow/models.html#InvalidFernetToken"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.InvalidFernetToken" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">exceptions.Exception</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.KnownEvent"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">KnownEvent</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#KnownEvent"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.KnownEvent" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.KnownEventType"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">KnownEventType</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#KnownEventType"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.KnownEventType" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.KubeResourceVersion"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">KubeResourceVersion</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#KubeResourceVersion"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.KubeResourceVersion" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.KubeWorkerIdentifier"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">KubeWorkerIdentifier</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#KubeWorkerIdentifier"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.KubeWorkerIdentifier" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.Log"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">Log</code><span class="sig-paren">(</span><em>event</em>, <em>task_instance</em>, <em>owner=None</em>, <em>extra=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Log" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <p>Used to actively log events to the database</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.NullFernet"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">NullFernet</code><a class="reference internal" href="_modules/airflow/models.html#NullFernet"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.NullFernet" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">future.types.newobject.newobject</span></code></p> |
| <p>A “Null” encryptor class that doesn’t encrypt or decrypt but that presents |
| a similar interface to Fernet.</p> |
| <p>The purpose of this is to make the rest of the code not have to know the |
| difference, and to only display the message once, not 20 times when |
| <cite>airflow initdb</cite> is ran.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.Pool"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">Pool</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Pool"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Pool" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <dl class="method"> |
| <dt id="airflow.models.Pool.open_slots"> |
| <code class="descname">open_slots</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Pool.open_slots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Pool.open_slots" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the number of slots open at the moment</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.Pool.queued_slots"> |
| <code class="descname">queued_slots</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Pool.queued_slots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Pool.queued_slots" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the number of slots used at the moment</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.Pool.used_slots"> |
| <code class="descname">used_slots</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Pool.used_slots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Pool.used_slots" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the number of slots used at the moment</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.SlaMiss"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">SlaMiss</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#SlaMiss"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.SlaMiss" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <p>Model that stores a history of the SLA that have been missed. |
| It is used to keep track of SLA failures over time and to avoid double |
| triggering alert emails.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.TaskFail"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">TaskFail</code><span class="sig-paren">(</span><em>task</em>, <em>execution_date</em>, <em>start_date</em>, <em>end_date</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskFail"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskFail" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <p>TaskFail tracks the failed run durations of each task instance.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.TaskInstance"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">TaskInstance</code><span class="sig-paren">(</span><em>task</em>, <em>execution_date</em>, <em>state=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Task instances store the state of a task instance. This table is the |
| authority and single source of truth around what tasks have run and the |
| state they are in.</p> |
| <p>The SqlAlchemy model doesn’t have a SqlAlchemy foreign key to the task or |
| dag model deliberately to have more control over transactions.</p> |
| <p>Database transactions on this table should insure double triggers and |
| any confusion around what task instances are or aren’t ready to run |
| even while multiple schedulers may be firing task instances.</p> |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.are_dependencies_met"> |
| <code class="descname">are_dependencies_met</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.are_dependencies_met"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.are_dependencies_met" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns whether or not all the conditions are met for this task instance to be run |
| given the context for the dependencies (e.g. a task instance being force run from |
| the UI will ignore some dependencies).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dep_context</strong> (<em>DepContext</em>) – The execution context that determines the dependencies that |
| should be evaluated.</li> |
| <li><strong>session</strong> (<em>Session</em>) – database session</li> |
| <li><strong>verbose</strong> (<em>boolean</em>) – whether log details on failed dependencies on |
| info or debug log level</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.are_dependents_done"> |
| <code class="descname">are_dependents_done</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.are_dependents_done"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.are_dependents_done" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether the dependents of this task instance have all succeeded. |
| This is meant to be used by wait_for_downstream.</p> |
| <p>This is useful when you do not want to start processing the next |
| schedule of a task until the dependents are done. For instance, |
| if the task DROPs and recreates a table.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.clear_xcom_data"> |
| <code class="descname">clear_xcom_data</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.clear_xcom_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.clear_xcom_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears all XCom data from the database for the task instance</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.command"> |
| <code class="descname">command</code><span class="sig-paren">(</span><em>mark_success=False</em>, <em>ignore_all_deps=False</em>, <em>ignore_depends_on_past=False</em>, <em>ignore_task_deps=False</em>, <em>ignore_ti_state=False</em>, <em>local=False</em>, <em>pickle_id=None</em>, <em>raw=False</em>, <em>job_id=None</em>, <em>pool=None</em>, <em>cfg_path=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.command"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.command" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a command that can be executed anywhere where airflow is |
| installed. This command is part of the message sent to executors by |
| the orchestrator.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.command_as_list"> |
| <code class="descname">command_as_list</code><span class="sig-paren">(</span><em>mark_success=False</em>, <em>ignore_all_deps=False</em>, <em>ignore_task_deps=False</em>, <em>ignore_depends_on_past=False</em>, <em>ignore_ti_state=False</em>, <em>local=False</em>, <em>pickle_id=None</em>, <em>raw=False</em>, <em>job_id=None</em>, <em>pool=None</em>, <em>cfg_path=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.command_as_list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.command_as_list" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a command that can be executed anywhere where airflow is |
| installed. This command is part of the message sent to executors by |
| the orchestrator.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.current_state"> |
| <code class="descname">current_state</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.current_state"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.current_state" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the very latest state from the database, if a session is passed, |
| we use and looking up the state becomes part of the session, otherwise |
| a new session is used.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.error"> |
| <code class="descname">error</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.error"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.error" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Forces the task instance’s state to FAILED in the database.</p> |
| </dd></dl> |
| |
| <dl class="staticmethod"> |
| <dt id="airflow.models.TaskInstance.generate_command"> |
| <em class="property">static </em><code class="descname">generate_command</code><span class="sig-paren">(</span><em>dag_id</em>, <em>task_id</em>, <em>execution_date</em>, <em>mark_success=False</em>, <em>ignore_all_deps=False</em>, <em>ignore_depends_on_past=False</em>, <em>ignore_task_deps=False</em>, <em>ignore_ti_state=False</em>, <em>local=False</em>, <em>pickle_id=None</em>, <em>file_path=None</em>, <em>raw=False</em>, <em>job_id=None</em>, <em>pool=None</em>, <em>cfg_path=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.generate_command"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.generate_command" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Generates the shell command required to execute this task instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>dag_id</strong> (<em>unicode</em>) – DAG ID</li> |
| <li><strong>task_id</strong> (<em>unicode</em>) – Task ID</li> |
| <li><strong>execution_date</strong> (<em>datetime</em>) – Execution date for the task</li> |
| <li><strong>mark_success</strong> (<em>bool</em>) – Whether to mark the task as successful</li> |
| <li><strong>ignore_all_deps</strong> (<em>boolean</em>) – Ignore all ignorable dependencies. |
| Overrides the other ignore_* parameters.</li> |
| <li><strong>ignore_depends_on_past</strong> (<em>boolean</em>) – Ignore depends_on_past parameter of DAGs |
| (e.g. for Backfills)</li> |
| <li><strong>ignore_task_deps</strong> (<em>boolean</em>) – Ignore task-specific dependencies such as depends_on_past |
| and trigger rule</li> |
| <li><strong>ignore_ti_state</strong> (<em>boolean</em>) – Ignore the task instance’s previous failure/success</li> |
| <li><strong>local</strong> (<em>bool</em>) – Whether to run the task locally</li> |
| <li><strong>pickle_id</strong> (<em>unicode</em>) – If the DAG was serialized to the DB, the ID |
| associated with the pickled DAG</li> |
| <li><strong>file_path</strong> – path to the file containing the DAG definition</li> |
| <li><strong>raw</strong> – raw mode (needs more details)</li> |
| <li><strong>job_id</strong> – job ID (needs more details)</li> |
| <li><strong>pool</strong> (<em>unicode</em>) – the Airflow pool that the task should run in</li> |
| <li><strong>cfg_path</strong> (<em>basestring</em>) – the Path to the configuration file</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">shell command that can be used to run the task instance</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.get_dagrun"> |
| <code class="descname">get_dagrun</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.get_dagrun"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.get_dagrun" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the DagRun for this TaskInstance</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">DagRun</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.init_on_load"> |
| <code class="descname">init_on_load</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.init_on_load"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.init_on_load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Initialize the attributes that aren’t stored in the DB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.init_run_context"> |
| <code class="descname">init_run_context</code><span class="sig-paren">(</span><em>raw=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.init_run_context"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.init_run_context" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the log context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.is_eligible_to_retry"> |
| <code class="descname">is_eligible_to_retry</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.is_eligible_to_retry"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.is_eligible_to_retry" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Is task instance is eligible for retry</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.TaskInstance.is_premature"> |
| <code class="descname">is_premature</code><a class="headerlink" href="#airflow.models.TaskInstance.is_premature" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns whether a task is in UP_FOR_RETRY state and its retry interval |
| has elapsed.</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.TaskInstance.key"> |
| <code class="descname">key</code><a class="headerlink" href="#airflow.models.TaskInstance.key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a tuple that identifies the task instance uniquely</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.next_retry_datetime"> |
| <code class="descname">next_retry_datetime</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.next_retry_datetime"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.next_retry_datetime" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get datetime of the next retry if the task instance fails. For exponential |
| backoff, retry_delay is used as base and will be converted to seconds.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.pool_full"> |
| <code class="descname">pool_full</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.pool_full"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.pool_full" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boolean as to whether the slot pool has room for this |
| task to run</p> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.TaskInstance.previous_ti"> |
| <code class="descname">previous_ti</code><a class="headerlink" href="#airflow.models.TaskInstance.previous_ti" title="Permalink to this definition">¶</a></dt> |
| <dd><p>The task instance for the task that ran before this task instance</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.ready_for_retry"> |
| <code class="descname">ready_for_retry</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.ready_for_retry"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.ready_for_retry" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks on whether the task instance is in the right state and timeframe |
| to be retried.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.refresh_from_db"> |
| <code class="descname">refresh_from_db</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.refresh_from_db"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.refresh_from_db" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Refreshes the task instance from the database based on the primary key</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>lock_for_update</strong> – if True, indicates that the database should |
| lock the TaskInstance (issuing a FOR UPDATE clause) until the |
| session is committed.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.models.TaskInstance.try_number"> |
| <code class="descname">try_number</code><a class="headerlink" href="#airflow.models.TaskInstance.try_number" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the try number that this task number will be when it is actually |
| run.</p> |
| <p>If the TI is currently running, this will match the column in the |
| databse, in all othercases this will be incremenetd</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.xcom_pull"> |
| <code class="descname">xcom_pull</code><span class="sig-paren">(</span><em>task_ids=None</em>, <em>dag_id=None</em>, <em>key=u'return_value'</em>, <em>include_prior_dates=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.xcom_pull"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.xcom_pull" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Pull XComs that optionally meet certain criteria.</p> |
| <p>The default value for <cite>key</cite> limits the search to XComs |
| that were returned by other tasks (as opposed to those that were pushed |
| manually). To remove this filter, pass key=None (or any desired value).</p> |
| <p>If a single task_id string is provided, the result is the value of the |
| most recent matching XCom from that task_id. If multiple task_ids are |
| provided, a tuple of matching values is returned. None is returned |
| whenever no matches are found.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>string</em>) – A key for the XCom. If provided, only XComs with matching |
| keys will be returned. The default key is ‘return_value’, also |
| available as a constant XCOM_RETURN_KEY. This key is automatically |
| given to XComs returned by tasks (as opposed to being pushed |
| manually). To remove the filter, pass key=None.</li> |
| <li><strong>task_ids</strong> (<em>string</em><em> or </em><em>iterable of strings</em><em> (</em><em>representing task_ids</em><em>)</em>) – Only XComs from tasks with matching ids will be |
| pulled. Can pass None to remove the filter.</li> |
| <li><strong>dag_id</strong> (<em>string</em>) – If provided, only pulls XComs from this DAG. |
| If None (default), the DAG of the calling task is used.</li> |
| <li><strong>include_prior_dates</strong> (<em>bool</em>) – If False, only XComs from the current |
| execution_date are returned. If True, XComs from previous dates |
| are returned as well.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.models.TaskInstance.xcom_push"> |
| <code class="descname">xcom_push</code><span class="sig-paren">(</span><em>key</em>, <em>value</em>, <em>execution_date=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskInstance.xcom_push"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskInstance.xcom_push" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Make an XCom available for tasks to pull.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>string</em>) – A key for the XCom</li> |
| <li><strong>value</strong> (<em>any pickleable object</em>) – A value for the XCom. The value is pickled and stored |
| in the database.</li> |
| <li><strong>execution_date</strong> (<em>datetime</em>) – if provided, the XCom will not be visible until |
| this date. This can be used, for example, to send a message to a |
| task on a future date without it being immediately visible.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.TaskReschedule"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">TaskReschedule</code><span class="sig-paren">(</span><em>task</em>, <em>execution_date</em>, <em>try_number</em>, <em>start_date</em>, <em>end_date</em>, <em>reschedule_date</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskReschedule"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskReschedule" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| <p>TaskReschedule tracks rescheduled task instances.</p> |
| <dl class="staticmethod"> |
| <dt id="airflow.models.TaskReschedule.find_for_task_instance"> |
| <em class="property">static </em><code class="descname">find_for_task_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#TaskReschedule.find_for_task_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.TaskReschedule.find_for_task_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns all task reschedules for the task instance and try number, |
| in ascending order.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>task_instance</strong> (<a class="reference internal" href="#airflow.models.TaskInstance" title="airflow.models.TaskInstance"><em>TaskInstance</em></a>) – the task instance to find task reschedules for</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.User"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">User</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#User"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.User" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code></p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.Variable"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">Variable</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Variable"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Variable" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <dl class="classmethod"> |
| <dt id="airflow.models.Variable.setdefault"> |
| <em class="property">classmethod </em><code class="descname">setdefault</code><span class="sig-paren">(</span><em>key</em>, <em>default</em>, <em>deserialize_json=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#Variable.setdefault"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.Variable.setdefault" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Like a Python builtin dict object, setdefault returns the current value |
| for a key, and if it isn’t there, stores the default value and returns it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>String</em>) – Dict key for this Variable</li> |
| <li><strong>default</strong> – Default value to set and return if the variable</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>isn’t already in the DB |
| :type default: Mixed |
| :param deserialize_json: Store this as a JSON encoded value in the DB</p> |
| <blockquote> |
| <div>and un-encode it when retrieving a value</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mixed</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.models.XCom"> |
| <em class="property">class </em><code class="descclassname">airflow.models.</code><code class="descname">XCom</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#XCom"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.XCom" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sqlalchemy.ext.declarative.api.Base</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Base class for XCom objects.</p> |
| <dl class="classmethod"> |
| <dt id="airflow.models.XCom.get_many"> |
| <em class="property">classmethod </em><code class="descname">get_many</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#XCom.get_many"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.XCom.get_many" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieve an XCom value, optionally meeting certain criteria |
| TODO: “pickling” has been deprecated and JSON is preferred.</p> |
| <blockquote> |
| <div>“pickling” will be removed in Airflow 2.0.</div></blockquote> |
| </dd></dl> |
| |
| <dl class="classmethod"> |
| <dt id="airflow.models.XCom.get_one"> |
| <em class="property">classmethod </em><code class="descname">get_one</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#XCom.get_one"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.XCom.get_one" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieve an XCom value, optionally meeting certain criteria. |
| TODO: “pickling” has been deprecated and JSON is preferred.</p> |
| <blockquote> |
| <div>“pickling” will be removed in Airflow 2.0.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">XCom value</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="classmethod"> |
| <dt id="airflow.models.XCom.set"> |
| <em class="property">classmethod </em><code class="descname">set</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#XCom.set"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.XCom.set" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Store an XCom value. |
| TODO: “pickling” has been deprecated and JSON is preferred.</p> |
| <blockquote> |
| <div>“pickling” will be removed in Airflow 2.0.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.models.clear_task_instances"> |
| <code class="descclassname">airflow.models.</code><code class="descname">clear_task_instances</code><span class="sig-paren">(</span><em>tis</em>, <em>session</em>, <em>activate_dag_runs=True</em>, <em>dag=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#clear_task_instances"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.clear_task_instances" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears a set of task instances, but makes sure the running ones |
| get killed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>tis</strong> – a list of task instances</li> |
| <li><strong>session</strong> – current session</li> |
| <li><strong>activate_dag_runs</strong> – flag to check for active dag run</li> |
| <li><strong>dag</strong> – DAG object</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.models.get_fernet"> |
| <code class="descclassname">airflow.models.</code><code class="descname">get_fernet</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/models.html#get_fernet"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.models.get_fernet" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deferred load of Fernet key.</p> |
| <p>This function could fail either because Cryptography is not installed |
| or because the Fernet key is invalid.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Fernet object</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body">AirflowException if there’s a problem trying to load Fernet</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="hooks"> |
| <h2>Hooks<a class="headerlink" href="#hooks" title="Permalink to this headline">¶</a></h2> |
| <p>Hooks are interfaces to external platforms and databases, implementing a common |
| interface when possible and acting as building blocks for operators.</p> |
| <dl class="class"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.dbapi_hook.</code><code class="descname">DbApiHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Abstract base class for sql hooks.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.bulk_dump"> |
| <code class="descname">bulk_dump</code><span class="sig-paren">(</span><em>table</em>, <em>tmp_file</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.bulk_dump"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.bulk_dump" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Dumps a database table into a tab-delimited file</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – The name of the source table</li> |
| <li><strong>tmp_file</strong> (<em>str</em>) – The path of the target file</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.bulk_load"> |
| <code class="descname">bulk_load</code><span class="sig-paren">(</span><em>table</em>, <em>tmp_file</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.bulk_load"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.bulk_load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a tab-delimited file into a database table</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – The name of the target table</li> |
| <li><strong>tmp_file</strong> (<em>str</em>) – The path of the file to load into the table</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.get_autocommit"> |
| <code class="descname">get_autocommit</code><span class="sig-paren">(</span><em>conn</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.get_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.get_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get autocommit setting for the provided connection. |
| Return True if conn.autocommit is set to True. |
| Return False if conn.autocommit is not set or set to False or conn |
| does not support autocommit. |
| :param conn: Connection to get autocommit setting from. |
| :type conn: connection object. |
| :return: connection autocommit setting. |
| :rtype bool.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.get_cursor"> |
| <code class="descname">get_cursor</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.get_cursor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.get_cursor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a cursor</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.get_first"> |
| <code class="descname">get_first</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.get_first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.get_first" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns the first resulting row.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns a pandas dataframe</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.get_records"> |
| <code class="descname">get_records</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.get_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.get_records" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns a set of records.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em>, <em>replace=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A generic way to insert a set of tuples into a table, |
| a new transaction is created every commit_every rows</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – Name of the target table</li> |
| <li><strong>rows</strong> (<em>iterable of tuples</em>) – The rows to insert into the table</li> |
| <li><strong>target_fields</strong> (<em>iterable of strings</em>) – The names of the columns to fill in the table</li> |
| <li><strong>commit_every</strong> (<em>int</em>) – The maximum number of rows to insert in one |
| transaction. Set to 0 to insert all rows in one transaction.</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Whether to replace instead of insert</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.run"> |
| <code class="descname">run</code><span class="sig-paren">(</span><em>sql</em>, <em>autocommit=False</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.run" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Runs a command or a list of commands. Pass a list of sql |
| statements to the sql parameter to get them to execute |
| sequentially</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</li> |
| <li><strong>autocommit</strong> (<em>bool</em>) – What to set the connection’s autocommit setting to |
| before executing the query.</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.dbapi_hook.DbApiHook.set_autocommit"> |
| <code class="descname">set_autocommit</code><span class="sig-paren">(</span><em>conn</em>, <em>autocommit</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/dbapi_hook.html#DbApiHook.set_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.dbapi_hook.DbApiHook.set_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the autocommit flag on the connection</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <span class="target" id="module-airflow.hooks.hive_hooks"></span><dl class="class"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.hive_hooks.</code><code class="descname">HiveCliHook</code><span class="sig-paren">(</span><em>hive_cli_conn_id=u'hive_cli_default'</em>, <em>run_as=None</em>, <em>mapred_queue=None</em>, <em>mapred_queue_priority=None</em>, <em>mapred_job_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveCliHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Simple wrapper around the hive CLI.</p> |
| <p>It also supports the <code class="docutils literal notranslate"><span class="pre">beeline</span></code> |
| a lighter CLI that runs JDBC and is replacing the heavier |
| traditional CLI. To enable <code class="docutils literal notranslate"><span class="pre">beeline</span></code>, set the use_beeline param in the |
| extra field of your connection as in <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">"use_beeline":</span> <span class="pre">true</span> <span class="pre">}</span></code></p> |
| <p>Note that you can also set default hive CLI parameters using the |
| <code class="docutils literal notranslate"><span class="pre">hive_cli_params</span></code> to be used in your connection as in |
| <code class="docutils literal notranslate"><span class="pre">{"hive_cli_params":</span> <span class="pre">"-hiveconf</span> <span class="pre">mapred.job.tracker=some.jobtracker:444"}</span></code> |
| Parameters passed here can be overridden by run_cli’s hive_conf param</p> |
| <p>The extra connection parameter <code class="docutils literal notranslate"><span class="pre">auth</span></code> gets passed as in the <code class="docutils literal notranslate"><span class="pre">jdbc</span></code> |
| connection string as is.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>mapred_queue</strong> (<em>string</em>) – queue used by the Hadoop Scheduler (Capacity or Fair)</li> |
| <li><strong>mapred_queue_priority</strong> (<em>string</em>) – priority within the job queue. |
| Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW</li> |
| <li><strong>mapred_job_name</strong> (<em>string</em>) – This name will appear in the jobtracker. |
| This can make monitoring easier.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.load_df"> |
| <code class="descname">load_df</code><span class="sig-paren">(</span><em>df</em>, <em>table</em>, <em>field_dict=None</em>, <em>delimiter=u'</em>, <em>'</em>, <em>encoding=u'utf8'</em>, <em>pandas_kwargs=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveCliHook.load_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.load_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a pandas DataFrame into hive.</p> |
| <p>Hive data types will be inferred if not passed but column names will |
| not be sanitized.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>df</strong> (<em>DataFrame</em>) – DataFrame to load into a Hive table</li> |
| <li><strong>table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database</li> |
| <li><strong>field_dict</strong> (<em>OrderedDict</em>) – mapping from column name to hive data type. |
| Note that it must be OrderedDict so as to keep columns’ order.</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>encoding</strong> (<em>str</em>) – string encoding to use when writing DataFrame to file</li> |
| <li><strong>pandas_kwargs</strong> (<em>dict</em>) – passed to DataFrame.to_csv</li> |
| <li><strong>kwargs</strong> – passed to self.load_file</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>filepath</em>, <em>table</em>, <em>delimiter=u'</em>, <em>'</em>, <em>field_dict=None</em>, <em>create=True</em>, <em>overwrite=True</em>, <em>partition=None</em>, <em>recreate=False</em>, <em>tblproperties=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveCliHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a local file into Hive</p> |
| <p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the tables gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>filepath</strong> (<em>str</em>) – local filepath of the file to load</li> |
| <li><strong>table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>field_dict</strong> (<em>OrderedDict</em>) – A dictionary of the fields name in the file |
| as keys and their Hive types as values. |
| Note that it must be OrderedDict so as to keep columns’ order.</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – whether to overwrite the data in table or partition</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns |
| and values</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every |
| execution</li> |
| <li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.run_cli"> |
| <code class="descname">run_cli</code><span class="sig-paren">(</span><em>hql</em>, <em>schema=None</em>, <em>verbose=True</em>, <em>hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveCliHook.run_cli"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.run_cli" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run an hql statement using the hive cli. If hive_conf is specified |
| it should be a dict and the entries will be set as key/value pairs |
| in HiveConf</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>hive_conf</strong> (<em>dict</em>) – if specified these key value pairs will be passed |
| to hive as <code class="docutils literal notranslate"><span class="pre">-hiveconf</span> <span class="pre">"key"="value"</span></code>. Note that they will be |
| passed after the <code class="docutils literal notranslate"><span class="pre">hive_cli_params</span></code> and thus will override |
| whatever values are specified in the database.</td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveCliHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">result</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">run_cli</span><span class="p">(</span><span class="s2">"USE airflow;"</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="p">(</span><span class="s2">"OK"</span> <span class="ow">in</span> <span class="n">result</span><span class="p">)</span> |
| <span class="go">True</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveCliHook.test_hql"> |
| <code class="descname">test_hql</code><span class="sig-paren">(</span><em>hql</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveCliHook.test_hql"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveCliHook.test_hql" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Test an hql statement using the hive cli and EXPLAIN</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.hive_hooks.</code><code class="descname">HiveMetastoreHook</code><span class="sig-paren">(</span><em>metastore_conn_id=u'metastore_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Wrapper to interact with the Hive Metastore</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition"> |
| <code class="descname">check_for_named_partition</code><span class="sig-paren">(</span><em>schema</em>, <em>table</em>, <em>partition_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.check_for_named_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a partition with a given name exists</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>schema</strong> (<em>string</em>) – Name of hive schema (database) @table belongs to</li> |
| <li><strong>table</strong> – Name of hive table @partition belongs to</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Partition:</th><td class="field-body"><p class="first">Name of the partitions to check for (eg <cite>a=b/c=d</cite>)</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">boolean</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_named_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds=2015-01-01"</span><span class="p">)</span> |
| <span class="go">True</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_named_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds=xxx"</span><span class="p">)</span> |
| <span class="go">False</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_partition"> |
| <code class="descname">check_for_partition</code><span class="sig-paren">(</span><em>schema</em>, <em>table</em>, <em>partition</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.check_for_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a partition exists</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>schema</strong> (<em>string</em>) – Name of hive schema (database) @table belongs to</li> |
| <li><strong>table</strong> – Name of hive table @partition belongs to</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Partition:</th><td class="field-body"><p class="first">Expression that matches the partitions to check for |
| (eg <cite>a = ‘b’ AND c = ‘d’</cite>)</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">boolean</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">check_for_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds='2015-01-01'"</span><span class="p">)</span> |
| <span class="go">True</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_databases"> |
| <code class="descname">get_databases</code><span class="sig-paren">(</span><em>pattern=u'*'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_databases"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_databases" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a metastore table object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_metastore_client"> |
| <code class="descname">get_metastore_client</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_metastore_client"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_metastore_client" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Hive thrift client.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_partitions"> |
| <code class="descname">get_partitions</code><span class="sig-paren">(</span><em>schema</em>, <em>table_name</em>, <em>filter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_partitions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_partitions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of all partitions in a table. Works only |
| for tables with less than 32767 (java short max val). |
| For subpartitioned table, the number might easily exceed this.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">parts</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_partitions</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">t</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">parts</span><span class="p">)</span> |
| <span class="go">1</span> |
| <span class="gp">>>> </span><span class="n">parts</span> |
| <span class="go">[{'ds': '2015-01-01'}]</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_table"> |
| <code class="descname">get_table</code><span class="sig-paren">(</span><em>table_name</em>, <em>db=u'default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_table"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_table" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a metastore table object</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_table</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">'static_babynames'</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">t</span><span class="o">.</span><span class="n">tableName</span> |
| <span class="go">'static_babynames'</span> |
| <span class="gp">>>> </span><span class="p">[</span><span class="n">col</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">sd</span><span class="o">.</span><span class="n">cols</span><span class="p">]</span> |
| <span class="go">['state', 'year', 'name', 'gender', 'num']</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.get_tables"> |
| <code class="descname">get_tables</code><span class="sig-paren">(</span><em>db</em>, <em>pattern=u'*'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.get_tables"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.get_tables" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a metastore table object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.max_partition"> |
| <code class="descname">max_partition</code><span class="sig-paren">(</span><em>schema</em>, <em>table_name</em>, <em>field=None</em>, <em>filter_map=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.max_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.max_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the maximum value for all partitions with given field in a table. |
| If only one partition key exist in the table, the key will be used as field. |
| filter_map should be a partition_key:partition_value map and will be used to |
| filter out partitions.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>schema</strong> (<em>string</em>) – schema name.</li> |
| <li><strong>table_name</strong> (<em>string</em>) – table name.</li> |
| <li><strong>field</strong> (<em>string</em>) – partition key to get max partition from.</li> |
| <li><strong>filter_map</strong> (<em>map</em>) – partition_key:partition_value map used for partition filtering.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">filter_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'ds'</span><span class="p">:</span> <span class="s1">'2015-01-01'</span><span class="p">,</span> <span class="s1">'ds'</span><span class="p">:</span> <span class="s1">'2014-01-01'</span><span class="p">}</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">max_partition</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="o">...</span> <span class="n">table_name</span><span class="o">=</span><span class="n">t</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="s1">'ds'</span><span class="p">,</span> <span class="n">filter_map</span><span class="o">=</span><span class="n">filter_map</span><span class="p">)</span> |
| <span class="go">'2015-01-01'</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveMetastoreHook.table_exists"> |
| <code class="descname">table_exists</code><span class="sig-paren">(</span><em>table_name</em>, <em>db=u'default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveMetastoreHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveMetastoreHook.table_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if table exists</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">'static_babynames'</span><span class="p">)</span> |
| <span class="go">True</span> |
| <span class="gp">>>> </span><span class="n">hh</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="n">db</span><span class="o">=</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="s1">'does_not_exist'</span><span class="p">)</span> |
| <span class="go">False</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.hive_hooks.</code><code class="descname">HiveServer2Hook</code><span class="sig-paren">(</span><em>hiveserver2_conn_id=u'hiveserver2_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Wrapper around the pyhive library</p> |
| <p>Note that the default authMechanism is PLAIN, to override it you |
| can specify it in the <code class="docutils literal notranslate"><span class="pre">extra</span></code> of your connection in the UI as in</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>hql</em>, <em>schema=u'default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a pandas dataframe from a Hive query</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveServer2Hook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT * FROM airflow.static_babynames LIMIT 100"</span> |
| <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_pandas_df</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="p">)</span> |
| <span class="go">100</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_records"> |
| <code class="descname">get_records</code><span class="sig-paren">(</span><em>hql</em>, <em>schema=u'default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_records" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a set of records from a Hive query.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hh</span> <span class="o">=</span> <span class="n">HiveServer2Hook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT * FROM airflow.static_babynames LIMIT 100"</span> |
| <span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">hh</span><span class="o">.</span><span class="n">get_records</span><span class="p">(</span><span class="n">sql</span><span class="p">))</span> |
| <span class="go">100</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.get_results"> |
| <code class="descname">get_results</code><span class="sig-paren">(</span><em>hql</em>, <em>schema=u'default'</em>, <em>fetch_size=None</em>, <em>hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.get_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.get_results" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get results of the provided hql in target schema. |
| :param hql: hql to be executed. |
| :param schema: target schema, default to ‘default’. |
| :param fetch_size max size of result to fetch. |
| :param hive_conf: hive_conf to execute alone with the hql. |
| :return: results of hql execution.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.hive_hooks.HiveServer2Hook.to_csv"> |
| <code class="descname">to_csv</code><span class="sig-paren">(</span><em>hql</em>, <em>csv_filepath</em>, <em>schema=u'default'</em>, <em>delimiter=u'</em>, <em>'</em>, <em>lineterminator=u'\r\n'</em>, <em>output_header=True</em>, <em>fetch_size=1000</em>, <em>hive_conf=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#HiveServer2Hook.to_csv"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.HiveServer2Hook.to_csv" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute hql in target schema and write results to a csv file. |
| :param hql: hql to be executed. |
| :param csv_filepath: filepath of csv to write results into. |
| :param schema: target schema, , default to ‘default’. |
| :param delimiter: delimiter of the csv file. |
| :param lineterminator: lineterminator of the csv file. |
| :param output_header: header of the csv file. |
| :param fetch_size: number of result rows to write into the csv file. |
| :param hive_conf: hive_conf to execute alone with the hql. |
| :return:</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="function"> |
| <dt id="airflow.hooks.hive_hooks.get_context_from_env_var"> |
| <code class="descclassname">airflow.hooks.hive_hooks.</code><code class="descname">get_context_from_env_var</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hive_hooks.html#get_context_from_env_var"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hive_hooks.get_context_from_env_var" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Extract context from env variable, e.g. dag_id, task_id and execution_date, |
| so that they can be used inside BashOperator and PythonOperator. |
| :return: The context of interest.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.http_hook.HttpHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.http_hook.</code><code class="descname">HttpHook</code><span class="sig-paren">(</span><em>method='POST'</em>, <em>http_conn_id='http_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/http_hook.html#HttpHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.http_hook.HttpHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interact with HTTP servers. |
| :param http_conn_id: connection that has the base API url i.e <a class="reference external" href="https://www.google.com/">https://www.google.com/</a></p> |
| <blockquote> |
| <div>and optional authentication credentials. Default headers can also be specified in |
| the Extra field in json format.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>method</strong> (<em>str</em>) – the API method to be called</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.hooks.http_hook.HttpHook.check_response"> |
| <code class="descname">check_response</code><span class="sig-paren">(</span><em>response</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/http_hook.html#HttpHook.check_response"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.http_hook.HttpHook.check_response" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks the status code and raise an AirflowException exception on non 2XX or 3XX |
| status codes |
| :param response: A requests response object |
| :type response: requests.response</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.http_hook.HttpHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><em>headers=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/http_hook.html#HttpHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.http_hook.HttpHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns http session for use with requests |
| :param headers: additional headers to be passed through as a dictionary |
| :type headers: dict</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.http_hook.HttpHook.run"> |
| <code class="descname">run</code><span class="sig-paren">(</span><em>endpoint</em>, <em>data=None</em>, <em>headers=None</em>, <em>extra_options=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/http_hook.html#HttpHook.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.http_hook.HttpHook.run" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Performs the request |
| :param endpoint: the endpoint to be called i.e. resource/v1/query? |
| :type endpoint: str |
| :param data: payload to be uploaded or request parameters |
| :type data: dict |
| :param headers: additional headers to be passed through as a dictionary |
| :type headers: dict |
| :param extra_options: additional options to be used when executing the request</p> |
| <blockquote> |
| <div>i.e. {‘check_response’: False} to avoid checking raising exceptions on non |
| 2XX or 3XX status codes</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.http_hook.HttpHook.run_and_check"> |
| <code class="descname">run_and_check</code><span class="sig-paren">(</span><em>session</em>, <em>prepped_request</em>, <em>extra_options</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/http_hook.html#HttpHook.run_and_check"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.http_hook.HttpHook.run_and_check" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Grabs extra options like timeout and actually runs the request, |
| checking for the result |
| :param session: the session to be used to execute the request |
| :type session: requests.Session |
| :param prepped_request: the prepared request generated in run() |
| :type prepped_request: session.prepare_request |
| :param extra_options: additional options to be used when executing the request</p> |
| <blockquote> |
| <div>i.e. {‘check_response’: False} to avoid checking raising exceptions on non 2XX |
| or 3XX status codes</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.http_hook.HttpHook.run_with_advanced_retry"> |
| <code class="descname">run_with_advanced_retry</code><span class="sig-paren">(</span><em>_retry_args</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/http_hook.html#HttpHook.run_with_advanced_retry"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.http_hook.HttpHook.run_with_advanced_retry" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Runs Hook.run() with a Tenacity decorator attached to it. This is useful for |
| connectors which might be disturbed by intermittent issues and should not |
| instantly fail. |
| :param _retry_args: Arguments which define the retry behaviour.</p> |
| <blockquote> |
| <div>See Tenacity documentation at <a class="reference external" href="https://github.com/jd/tenacity">https://github.com/jd/tenacity</a></div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Example: ::</dt> |
| <dd><p class="first">hook = HttpHook(http_conn_id=’my_conn’,method=’GET’) |
| retry_args = dict(</p> |
| <blockquote class="last"> |
| <div><blockquote> |
| <div>wait=tenacity.wait_exponential(), |
| stop=tenacity.stop_after_attempt(10), |
| retry=requests.exceptions.ConnectionError</div></blockquote> |
| <p>) |
| hook.run_with_advanced_retry(</p> |
| <blockquote> |
| <div><blockquote> |
| <div>endpoint=’v1/test’, |
| _retry_args=retry_args</div></blockquote> |
| <p>)</p> |
| </div></blockquote> |
| </div></blockquote> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.druid_hook.DruidDbApiHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.druid_hook.</code><code class="descname">DruidDbApiHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidDbApiHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidDbApiHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with Druid broker</p> |
| <p>This hook is purely for users to query druid broker. |
| For ingestion, please use druidHook.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.druid_hook.DruidDbApiHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidDbApiHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidDbApiHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Establish a connection to druid broker.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.druid_hook.DruidDbApiHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidDbApiHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidDbApiHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns a pandas dataframe</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.druid_hook.DruidDbApiHook.get_uri"> |
| <code class="descname">get_uri</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidDbApiHook.get_uri"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidDbApiHook.get_uri" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the connection uri for druid broker.</p> |
| <p>e.g: druid://localhost:8082/druid/v2/sql/</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.druid_hook.DruidDbApiHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidDbApiHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidDbApiHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A generic way to insert a set of tuples into a table, |
| a new transaction is created every commit_every rows</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – Name of the target table</li> |
| <li><strong>rows</strong> (<em>iterable of tuples</em>) – The rows to insert into the table</li> |
| <li><strong>target_fields</strong> (<em>iterable of strings</em>) – The names of the columns to fill in the table</li> |
| <li><strong>commit_every</strong> (<em>int</em>) – The maximum number of rows to insert in one |
| transaction. Set to 0 to insert all rows in one transaction.</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Whether to replace instead of insert</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.druid_hook.DruidDbApiHook.set_autocommit"> |
| <code class="descname">set_autocommit</code><span class="sig-paren">(</span><em>conn</em>, <em>autocommit</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidDbApiHook.set_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidDbApiHook.set_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the autocommit flag on the connection</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.druid_hook.DruidHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.druid_hook.</code><code class="descname">DruidHook</code><span class="sig-paren">(</span><em>druid_ingest_conn_id='druid_ingest_default'</em>, <em>timeout=1</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/druid_hook.html#DruidHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.druid_hook.DruidHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Connection to Druid overlord for ingestion</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>druid_ingest_conn_id</strong> (<em>string</em>) – The connection id to the Druid overlord machine |
| which accepts index jobs</li> |
| <li><strong>timeout</strong> (<em>int</em>) – The interval between polling |
| the Druid job for the status of the ingestion job. |
| Must be greater than or equal to 1</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – The maximum ingestion time before assuming the job failed</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.hdfs_hook.HDFSHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.hdfs_hook.</code><code class="descname">HDFSHook</code><span class="sig-paren">(</span><em>hdfs_conn_id='hdfs_default'</em>, <em>proxy_user=None</em>, <em>autoconfig=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hdfs_hook.html#HDFSHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hdfs_hook.HDFSHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interact with HDFS. This class is a wrapper around the snakebite library.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>hdfs_conn_id</strong> – Connection id to fetch connection info</li> |
| <li><strong>proxy_user</strong> (<em>string</em>) – effective user for HDFS operations</li> |
| <li><strong>autoconfig</strong> (<em>bool</em>) – use snakebite’s automatically configured client</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.hooks.hdfs_hook.HDFSHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/hdfs_hook.html#HDFSHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.hdfs_hook.HDFSHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a snakebite HDFSClient object.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.mssql_hook.MsSqlHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.mssql_hook.</code><code class="descname">MsSqlHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mssql_hook.html#MsSqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mssql_hook.MsSqlHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with Microsoft SQL Server.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.mssql_hook.MsSqlHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mssql_hook.html#MsSqlHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mssql_hook.MsSqlHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a mssql connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.mssql_hook.MsSqlHook.set_autocommit"> |
| <code class="descname">set_autocommit</code><span class="sig-paren">(</span><em>conn</em>, <em>autocommit</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mssql_hook.html#MsSqlHook.set_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mssql_hook.MsSqlHook.set_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the autocommit flag on the connection</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.mysql_hook.MySqlHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.mysql_hook.</code><code class="descname">MySqlHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mysql_hook.html#MySqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mysql_hook.MySqlHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with MySQL.</p> |
| <p>You can specify charset in the extra field of your connection |
| as <code class="docutils literal notranslate"><span class="pre">{"charset":</span> <span class="pre">"utf8"}</span></code>. Also you can choose cursor as |
| <code class="docutils literal notranslate"><span class="pre">{"cursor":</span> <span class="pre">"SSCursor"}</span></code>. Refer to the MySQLdb.cursors for more details.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.mysql_hook.MySqlHook.bulk_dump"> |
| <code class="descname">bulk_dump</code><span class="sig-paren">(</span><em>table</em>, <em>tmp_file</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mysql_hook.html#MySqlHook.bulk_dump"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mysql_hook.MySqlHook.bulk_dump" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Dumps a database table into a tab-delimited file</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.mysql_hook.MySqlHook.bulk_load"> |
| <code class="descname">bulk_load</code><span class="sig-paren">(</span><em>table</em>, <em>tmp_file</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mysql_hook.html#MySqlHook.bulk_load"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mysql_hook.MySqlHook.bulk_load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a tab-delimited file into a database table</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.mysql_hook.MySqlHook.get_autocommit"> |
| <code class="descname">get_autocommit</code><span class="sig-paren">(</span><em>conn</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mysql_hook.html#MySqlHook.get_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mysql_hook.MySqlHook.get_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>MySql connection gets autocommit in a different way. |
| :param conn: connection to get autocommit setting from. |
| :type conn: connection object. |
| :return: connection autocommit setting |
| :rtype bool</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.mysql_hook.MySqlHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mysql_hook.html#MySqlHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mysql_hook.MySqlHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a mysql connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.mysql_hook.MySqlHook.set_autocommit"> |
| <code class="descname">set_autocommit</code><span class="sig-paren">(</span><em>conn</em>, <em>autocommit</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/mysql_hook.html#MySqlHook.set_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.mysql_hook.MySqlHook.set_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>MySql connection sets autocommit in a different way.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.pig_hook.PigCliHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.pig_hook.</code><code class="descname">PigCliHook</code><span class="sig-paren">(</span><em>pig_cli_conn_id='pig_cli_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/pig_hook.html#PigCliHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.pig_hook.PigCliHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Simple wrapper around the pig CLI.</p> |
| <p>Note that you can also set default pig CLI properties using the |
| <code class="docutils literal notranslate"><span class="pre">pig_properties</span></code> to be used in your connection as in |
| <code class="docutils literal notranslate"><span class="pre">{"pig_properties":</span> <span class="pre">"-Dpig.tmpfilecompression=true"}</span></code></p> |
| <dl class="method"> |
| <dt id="airflow.hooks.pig_hook.PigCliHook.run_cli"> |
| <code class="descname">run_cli</code><span class="sig-paren">(</span><em>pig</em>, <em>verbose=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/pig_hook.html#PigCliHook.run_cli"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.pig_hook.PigCliHook.run_cli" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run an pig script using the pig cli</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ph</span> <span class="o">=</span> <span class="n">PigCliHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">result</span> <span class="o">=</span> <span class="n">ph</span><span class="o">.</span><span class="n">run_cli</span><span class="p">(</span><span class="s2">"ls /;"</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="p">(</span><span class="s2">"hdfs://"</span> <span class="ow">in</span> <span class="n">result</span><span class="p">)</span> |
| <span class="go">True</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.postgres_hook.PostgresHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.postgres_hook.</code><code class="descname">PostgresHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/postgres_hook.html#PostgresHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.postgres_hook.PostgresHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with Postgres. |
| You can specify ssl parameters in the extra field of your connection |
| as <code class="docutils literal notranslate"><span class="pre">{"sslmode":</span> <span class="pre">"require",</span> <span class="pre">"sslcert":</span> <span class="pre">"/path/to/cert.pem",</span> <span class="pre">etc}</span></code>.</p> |
| <p>Note: For Redshift, use keepalives_idle in the extra connection parameters |
| and set it to less than 300 seconds.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.postgres_hook.PostgresHook.bulk_dump"> |
| <code class="descname">bulk_dump</code><span class="sig-paren">(</span><em>table</em>, <em>tmp_file</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/postgres_hook.html#PostgresHook.bulk_dump"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.postgres_hook.PostgresHook.bulk_dump" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Dumps a database table into a tab-delimited file</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.postgres_hook.PostgresHook.bulk_load"> |
| <code class="descname">bulk_load</code><span class="sig-paren">(</span><em>table</em>, <em>tmp_file</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/postgres_hook.html#PostgresHook.bulk_load"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.postgres_hook.PostgresHook.bulk_load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a tab-delimited file into a database table</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.postgres_hook.PostgresHook.copy_expert"> |
| <code class="descname">copy_expert</code><span class="sig-paren">(</span><em>sql</em>, <em>filename</em>, <em>open=<built-in function open></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/postgres_hook.html#PostgresHook.copy_expert"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.postgres_hook.PostgresHook.copy_expert" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes SQL using psycopg2 copy_expert method. |
| Necessary to execute COPY command without access to a superuser.</p> |
| <p>Note: if this method is called with a “COPY FROM” statement and |
| the specified input file does not exist, it creates an empty |
| file and no data is loaded, but the operation succeeds. |
| So if users want to be aware when the input file does not exist, |
| they have to check its existence by themselves.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.postgres_hook.PostgresHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/postgres_hook.html#PostgresHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.postgres_hook.PostgresHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a connection object</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.presto_hook.</code><code class="descname">PrestoHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with Presto through PyHive!</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ph</span> <span class="o">=</span> <span class="n">PrestoHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">sql</span> <span class="o">=</span> <span class="s2">"SELECT count(1) AS num FROM airflow.static_babynames"</span> |
| <span class="gp">>>> </span><span class="n">ph</span><span class="o">.</span><span class="n">get_records</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span> |
| <span class="go">[[340698]]</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook.get_first"> |
| <code class="descname">get_first</code><span class="sig-paren">(</span><em>hql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook.get_first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook.get_first" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns only the first row, regardless of how many rows the query |
| returns.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>hql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a pandas dataframe from a sql query.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook.get_records"> |
| <code class="descname">get_records</code><span class="sig-paren">(</span><em>hql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook.get_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook.get_records" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a set of records from Presto</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A generic way to insert a set of tuples into a table.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – Name of the target table</li> |
| <li><strong>rows</strong> (<em>iterable of tuples</em>) – The rows to insert into the table</li> |
| <li><strong>target_fields</strong> (<em>iterable of strings</em>) – The names of the columns to fill in the table</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.presto_hook.PrestoHook.run"> |
| <code class="descname">run</code><span class="sig-paren">(</span><em>hql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/presto_hook.html#PrestoHook.run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.presto_hook.PrestoHook.run" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the statement against Presto. Can be used to create views.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.S3_hook.S3Hook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.S3_hook.</code><code class="descname">S3Hook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS S3, using the boto3 library.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_bucket"> |
| <code class="descname">check_for_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if bucket_name exists.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_key"> |
| <code class="descname">check_for_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a key exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_prefix"> |
| <code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix</em>, <em>delimiter</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks that a prefix exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key"> |
| <code class="descname">check_for_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks that a key matching a wildcard expression exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.copy_object"> |
| <code class="descname">copy_object</code><span class="sig-paren">(</span><em>source_bucket_key</em>, <em>dest_bucket_key</em>, <em>source_bucket_name=None</em>, <em>dest_bucket_name=None</em>, <em>source_version_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.copy_object"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.copy_object" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a copy of an object that is already stored in S3.</p> |
| <p>Note: the S3 connection used here needs to have access to both |
| source and destination bucket/key.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket_key</strong> (<em>str</em>) – <p>The key of the source object.</p> |
| <p>It can be either full s3:// style url or relative path from root level.</p> |
| <p>When it’s specified as a full s3:// url, please omit source_bucket_name.</p> |
| </li> |
| <li><strong>dest_bucket_key</strong> (<em>str</em>) – <p>The key of the object to copy to.</p> |
| <p>The convention to specify <cite>dest_bucket_key</cite> is the same |
| as <cite>source_bucket_key</cite>.</p> |
| </li> |
| <li><strong>source_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket where the source object is in.</p> |
| <p>It should be omitted when <cite>source_bucket_key</cite> is provided as a full s3:// url.</p> |
| </li> |
| <li><strong>dest_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket to where the object is copied.</p> |
| <p>It should be omitted when <cite>dest_bucket_key</cite> is provided as a full s3:// url.</p> |
| </li> |
| <li><strong>source_version_id</strong> (<em>str</em>) – Version ID of the source object (OPTIONAL)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.create_bucket"> |
| <code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.create_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates an Amazon S3 bucket.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – The name of the bucket</li> |
| <li><strong>region_name</strong> (<em>str</em>) – The name of the aws region in which to create the bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.delete_objects"> |
| <code class="descname">delete_objects</code><span class="sig-paren">(</span><em>bucket</em>, <em>keys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.delete_objects"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.delete_objects" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of the bucket in which you are going to delete object(s)</li> |
| <li><strong>keys</strong> (<em>str</em><em> or </em><em>list</em>) – <p>The key(s) to delete from S3 bucket.</p> |
| <p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a string, it’s supposed to be the key name of |
| the single object to delete.</p> |
| <p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a list, it’s supposed to be the list of the |
| keys to delete.</p> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_bucket"> |
| <code class="descname">get_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.S3.Bucket object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_key"> |
| <code class="descname">get_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.s3.Object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_wildcard_key"> |
| <code class="descname">get_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_wildcard_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.s3.Object object matching the wildcard expression</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.list_keys"> |
| <code class="descname">list_keys</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_keys" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists keys in a bucket under prefix and not containing delimiter</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.list_prefixes"> |
| <code class="descname">list_prefixes</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_prefixes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_prefixes" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists prefixes in a bucket under prefix</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_bytes"> |
| <code class="descname">load_bytes</code><span class="sig-paren">(</span><em>bytes_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_bytes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_bytes" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads bytes to S3</p> |
| <p>This is provided as a convenience to drop a string in S3. It uses the |
| boto infrastructure to ship a file to s3.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bytes_data</strong> (<em>bytes</em>) – bytes to set as content for the key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>filename</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a local file to S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>filename</strong> (<em>str</em>) – name of the file to load.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists. If replace is False and the key exists, an |
| error will be raised.</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_file_obj"> |
| <code class="descname">load_file_obj</code><span class="sig-paren">(</span><em>file_obj</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file_obj"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file_obj" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a file object to S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_obj</strong> (<em>file-like object</em>) – The file-like object to set as the content for the S3 key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag that indicates whether to overwrite the key |
| if it already exists.</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, S3 encrypts the file on the server, |
| and the file is stored in encrypted form at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em>, <em>encoding='utf-8'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a string to S3</p> |
| <p>This is provided as a convenience to drop a string in S3. It uses the |
| boto infrastructure to ship a file to s3.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – string to set as content for the key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.read_key"> |
| <code class="descname">read_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.read_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.read_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads a key from S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.select_key"> |
| <code class="descname">select_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em>, <em>expression='SELECT * FROM S3Object'</em>, <em>expression_type='SQL'</em>, <em>input_serialization=None</em>, <em>output_serialization=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.select_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.select_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads a key with S3 Select.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| <li><strong>expression</strong> (<em>str</em>) – S3 Select expression</li> |
| <li><strong>expression_type</strong> (<em>str</em>) – S3 Select expression type</li> |
| <li><strong>input_serialization</strong> (<em>dict</em>) – S3 Select input data serialization format</li> |
| <li><strong>output_serialization</strong> (<em>dict</em>) – S3 Select output data serialization format</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">retrieved subset of original data by S3 Select</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about S3 Select parameters: |
| <a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content">http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content</a></p> |
| </div> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.samba_hook.SambaHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.samba_hook.</code><code class="descname">SambaHook</code><span class="sig-paren">(</span><em>samba_conn_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/samba_hook.html#SambaHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.samba_hook.SambaHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Allows for interaction with an samba server.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.slack_hook.SlackHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.slack_hook.</code><code class="descname">SlackHook</code><span class="sig-paren">(</span><em>token=None</em>, <em>slack_conn_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/slack_hook.html#SlackHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.slack_hook.SlackHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interact with Slack, using slackclient library.</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.sqlite_hook.SqliteHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.sqlite_hook.</code><code class="descname">SqliteHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/sqlite_hook.html#SqliteHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.sqlite_hook.SqliteHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with SQLite.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.sqlite_hook.SqliteHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/sqlite_hook.html#SqliteHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.sqlite_hook.SqliteHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a sqlite connection object</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.hooks.zendesk_hook.ZendeskHook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.zendesk_hook.</code><code class="descname">ZendeskHook</code><span class="sig-paren">(</span><em>zendesk_conn_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/zendesk_hook.html#ZendeskHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.zendesk_hook.ZendeskHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>A hook to talk to Zendesk</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.zendesk_hook.ZendeskHook.call"> |
| <code class="descname">call</code><span class="sig-paren">(</span><em>path</em>, <em>query=None</em>, <em>get_all_pages=True</em>, <em>side_loading=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/zendesk_hook.html#ZendeskHook.call"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.zendesk_hook.ZendeskHook.call" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Call Zendesk API and return results</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>path</strong> – The Zendesk API to call</li> |
| <li><strong>query</strong> – Query parameters</li> |
| <li><strong>get_all_pages</strong> – Accumulate results over all pages before |
| returning. Due to strict rate limiting, this can often timeout. |
| Waits for recommended period between tries after a timeout.</li> |
| <li><strong>side_loading</strong> – Retrieve related records as part of a single |
| request. In order to enable side-loading, add an ‘include’ |
| query parameter containing a comma-separated list of resources |
| to load. For more information on side-loading see |
| <a class="reference external" href="https://developer.zendesk.com/rest_api/docs/core/side_loading">https://developer.zendesk.com/rest_api/docs/core/side_loading</a></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <div class="section" id="community-contributed-hooks"> |
| <h3>Community contributed hooks<a class="headerlink" href="#community-contributed-hooks" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.aws_athena_hook.</code><code class="descname">AWSAthenaHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>sleep_time=30</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Athena to run, poll queries and return query results</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to use.</li> |
| <li><strong>sleep_time</strong> (<em>int</em>) – Time to wait between two consecutive call to check query status on athena</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.check_query_status"> |
| <code class="descname">check_query_status</code><span class="sig-paren">(</span><em>query_execution_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook.check_query_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.check_query_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fetch the status of submitted athena query. Returns None or one of valid query states.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>query_execution_id</strong> (<em>str</em>) – Id of submitted athena query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">str</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>check if aws conn exists already or create one and return it</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">boto3 session</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.get_query_results"> |
| <code class="descname">get_query_results</code><span class="sig-paren">(</span><em>query_execution_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook.get_query_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.get_query_results" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fetch submitted athena query results. returns none if query is in intermediate state or |
| failed/cancelled state else dict of query output</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>query_execution_id</strong> (<em>str</em>) – Id of submitted athena query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.poll_query_status"> |
| <code class="descname">poll_query_status</code><span class="sig-paren">(</span><em>query_execution_id</em>, <em>max_tries=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook.poll_query_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.poll_query_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Poll the status of submitted athena query until query state reaches final state. |
| Returns one of the final states</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query_execution_id</strong> (<em>str</em>) – Id of submitted athena query</li> |
| <li><strong>max_tries</strong> (<em>int</em>) – Number of times to poll for query state before function exits</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.run_query"> |
| <code class="descname">run_query</code><span class="sig-paren">(</span><em>query</em>, <em>query_context</em>, <em>result_configuration</em>, <em>client_request_token=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.run_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run Presto query on athena with provided config and return submitted query_execution_id</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>str</em>) – Presto query to run</li> |
| <li><strong>query_context</strong> (<em>dict</em>) – Context in which query need to be run</li> |
| <li><strong>result_configuration</strong> (<em>dict</em>) – Dict with path to store results in and config related to encryption</li> |
| <li><strong>client_request_token</strong> (<em>str</em>) – Unique token created by user to avoid multiple executions of same query</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.stop_query"> |
| <code class="descname">stop_query</code><span class="sig-paren">(</span><em>query_execution_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_athena_hook.html#AWSAthenaHook.stop_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_athena_hook.AWSAthenaHook.stop_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Cancel the submitted athena query</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>query_execution_id</strong> (<em>str</em>) – Id of submitted athena query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.aws_dynamodb_hook.AwsDynamoDBHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.aws_dynamodb_hook.</code><code class="descname">AwsDynamoDBHook</code><span class="sig-paren">(</span><em>table_keys=None</em>, <em>table_name=None</em>, <em>region_name=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_dynamodb_hook.html#AwsDynamoDBHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_dynamodb_hook.AwsDynamoDBHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS DynamoDB.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table_keys</strong> (<em>list</em>) – partition key and sort key</li> |
| <li><strong>table_name</strong> (<em>str</em>) – target DynamoDB table</li> |
| <li><strong>region_name</strong> (<em>str</em>) – aws region name (example: us-east-1)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_dynamodb_hook.AwsDynamoDBHook.write_batch_data"> |
| <code class="descname">write_batch_data</code><span class="sig-paren">(</span><em>items</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_dynamodb_hook.html#AwsDynamoDBHook.write_batch_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_dynamodb_hook.AwsDynamoDBHook.write_batch_data" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Write batch items to dynamodb table with provisioned throughout capacity.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.aws_firehose_hook.AwsFirehoseHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.aws_firehose_hook.</code><code class="descname">AwsFirehoseHook</code><span class="sig-paren">(</span><em>delivery_stream</em>, <em>region_name=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_firehose_hook.html#AwsFirehoseHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_firehose_hook.AwsFirehoseHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Kinesis Firehose. |
| :param delivery_stream: Name of the delivery stream |
| :type delivery_stream: str |
| :param region_name: AWS region name (example: us-east-1) |
| :type region_name: str</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_firehose_hook.AwsFirehoseHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_firehose_hook.html#AwsFirehoseHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_firehose_hook.AwsFirehoseHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns AwsHook connection object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_firehose_hook.AwsFirehoseHook.put_records"> |
| <code class="descname">put_records</code><span class="sig-paren">(</span><em>records</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_firehose_hook.html#AwsFirehoseHook.put_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_firehose_hook.AwsFirehoseHook.put_records" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Write batch records to Kinesis Firehose</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.aws_glue_catalog_hook.</code><code class="descname">AwsGlueCatalogHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>region_name=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_glue_catalog_hook.html#AwsGlueCatalogHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Glue Catalog</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – ID of the Airflow connection where |
| credentials and extra configuration are stored</li> |
| <li><strong>region_name</strong> (<em>str</em>) – aws region name (example: us-east-1)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook.check_for_partition"> |
| <code class="descname">check_for_partition</code><span class="sig-paren">(</span><em>database_name</em>, <em>table_name</em>, <em>expression</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_glue_catalog_hook.html#AwsGlueCatalogHook.check_for_partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook.check_for_partition" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a partition exists</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>database_name</strong> (<em>str</em>) – Name of hive database (schema) @table belongs to</li> |
| <li><strong>table_name</strong> (<em>str</em>) – Name of hive table @partition belongs to</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Expression:</th><td class="field-body"><p class="first">Expression that matches the partitions to check for |
| (eg <cite>a = ‘b’ AND c = ‘d’</cite>)</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">hook</span> <span class="o">=</span> <span class="n">AwsGlueCatalogHook</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">t</span> <span class="o">=</span> <span class="s1">'static_babynames_partitioned'</span> |
| <span class="gp">>>> </span><span class="n">hook</span><span class="o">.</span><span class="n">check_for_partition</span><span class="p">(</span><span class="s1">'airflow'</span><span class="p">,</span> <span class="n">t</span><span class="p">,</span> <span class="s2">"ds='2015-01-01'"</span><span class="p">)</span> |
| <span class="go">True</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_glue_catalog_hook.html#AwsGlueCatalogHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns glue connection object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook.get_partitions"> |
| <code class="descname">get_partitions</code><span class="sig-paren">(</span><em>database_name</em>, <em>table_name</em>, <em>expression=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_glue_catalog_hook.html#AwsGlueCatalogHook.get_partitions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_glue_catalog_hook.AwsGlueCatalogHook.get_partitions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves the partition values for a table.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>database_name</strong> (<em>str</em>) – The name of the catalog database where the partitions reside.</li> |
| <li><strong>table_name</strong> (<em>str</em>) – The name of the partitions’ table.</li> |
| <li><strong>expression</strong> (<em>str</em>) – An expression filtering the partitions to be returned. |
| Please see official AWS documentation for further information. |
| <a class="reference external" href="https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetPartitions">https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetPartitions</a></li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">set of partition values where each value is a tuple since |
| a partition may be composed of multiple columns. For example:</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>{(‘2018-01-01’,‘1’), (‘2018-01-01’,‘2’)}</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.aws_hook.AwsHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.aws_hook.</code><code class="descname">AwsHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_hook.html#AwsHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interact with AWS. |
| This class is a thin wrapper around the boto3 python library.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_hook.AwsHook.expand_role"> |
| <code class="descname">expand_role</code><span class="sig-paren">(</span><em>role</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_hook.html#AwsHook.expand_role"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_hook.AwsHook.expand_role" title="Permalink to this definition">¶</a></dt> |
| <dd><p>If the IAM role is a role name, get the Amazon Resource Name (ARN) for the role. |
| If IAM role is already an IAM role ARN, no change is made.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>role</strong> – IAM role name or ARN</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">IAM role ARN</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_hook.AwsHook.get_credentials"> |
| <code class="descname">get_credentials</code><span class="sig-paren">(</span><em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_hook.html#AwsHook.get_credentials"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_hook.AwsHook.get_credentials" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the underlying <cite>botocore.Credentials</cite> object.</p> |
| <p>This contains the following authentication attributes: access_key, secret_key and token.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_hook.AwsHook.get_session"> |
| <code class="descname">get_session</code><span class="sig-paren">(</span><em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_hook.html#AwsHook.get_session"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_hook.AwsHook.get_session" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the underlying boto3.session.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.aws_lambda_hook.AwsLambdaHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.aws_lambda_hook.</code><code class="descname">AwsLambdaHook</code><span class="sig-paren">(</span><em>function_name</em>, <em>region_name=None</em>, <em>log_type='None'</em>, <em>qualifier='$LATEST'</em>, <em>invocation_type='RequestResponse'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_lambda_hook.html#AwsLambdaHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_lambda_hook.AwsLambdaHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Lambda</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>function_name</strong> (<em>str</em>) – AWS Lambda Function Name</li> |
| <li><strong>region_name</strong> (<em>str</em>) – AWS Region Name (example: us-west-2)</li> |
| <li><strong>log_type</strong> (<em>str</em>) – Tail Invocation Request</li> |
| <li><strong>qualifier</strong> (<em>str</em>) – AWS Lambda Function Version or Alias Name</li> |
| <li><strong>invocation_type</strong> (<em>str</em>) – AWS Lambda Invocation Type (RequestResponse, Event etc)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.aws_lambda_hook.AwsLambdaHook.invoke_lambda"> |
| <code class="descname">invoke_lambda</code><span class="sig-paren">(</span><em>payload</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/aws_lambda_hook.html#AwsLambdaHook.invoke_lambda"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.aws_lambda_hook.AwsLambdaHook.invoke_lambda" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Invoke Lambda Function</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_cosmos_hook.</code><code class="descname">AzureCosmosDBHook</code><span class="sig-paren">(</span><em>azure_cosmos_conn_id='azure_cosmos_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure CosmosDB.</p> |
| <p>login should be the endpoint uri, password should be the master key |
| optionally, you can use the following extras to default these values |
| {“database_name”: “<DATABASE_NAME>”, “collection_name”: “COLLECTION_NAME”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_cosmos_conn_id</strong> (<em>str</em>) – Reference to the Azure CosmosDB connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_collection"> |
| <code class="descname">create_collection</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.create_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_collection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_database"> |
| <code class="descname">create_database</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new database in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_collection"> |
| <code class="descname">delete_collection</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_collection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes an existing collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_database"> |
| <code class="descname">delete_database</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes an existing database in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_document"> |
| <code class="descname">delete_document</code><span class="sig-paren">(</span><em>document_id</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_document" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete an existing document out of a collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_collection_exist"> |
| <code class="descname">does_collection_exist</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.does_collection_exist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_collection_exist" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a collection exists in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_database_exist"> |
| <code class="descname">does_database_exist</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.does_database_exist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_database_exist" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a database exists in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return a cosmos db client.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_document"> |
| <code class="descname">get_document</code><span class="sig-paren">(</span><em>document_id</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_document" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a document from an existing collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_documents"> |
| <code class="descname">get_documents</code><span class="sig-paren">(</span><em>sql_string</em>, <em>database_name=None</em>, <em>collection_name=None</em>, <em>partition_key=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_documents"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_documents" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a list of documents from an existing collection in the CosmosDB database via SQL query.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.insert_documents"> |
| <code class="descname">insert_documents</code><span class="sig-paren">(</span><em>documents</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.insert_documents"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.insert_documents" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Insert a list of new documents into an existing collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.upsert_document"> |
| <code class="descname">upsert_document</code><span class="sig-paren">(</span><em>document</em>, <em>database_name=None</em>, <em>collection_name=None</em>, <em>document_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.upsert_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.upsert_document" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Inserts a new document (or updates an existing one) into an existing |
| collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_data_lake_hook.</code><code class="descname">AzureDataLakeHook</code><span class="sig-paren">(</span><em>azure_data_lake_conn_id='azure_data_lake_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure Data Lake.</p> |
| <p>Client ID and client secret should be in user and password parameters. |
| Tenant and account name should be extra field as |
| {“tenant”: “<TENANT>”, “account_name”: “ACCOUNT_NAME”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – Reference to the Azure Data Lake connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file"> |
| <code class="descname">check_for_file</code><span class="sig-paren">(</span><em>file_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a file exists on Azure Data Lake.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>file_path</strong> (<em>str</em>) – Path and name of the file.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if the file exists, False otherwise.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">bool</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file"> |
| <code class="descname">download_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.download_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>local_path</strong> (<em>str</em>) – local path. If downloading a single file, will write to this |
| specific file, unless it is an existing directory, in which case a file is |
| created within it. If downloading multiple files, this is the root |
| directory to write within. Will create directories as required.</li> |
| <li><strong>remote_path</strong> (<em>str</em>) – remote path/globstring to use to find remote files. |
| Recursive glob patterns using <cite>**</cite> are not supported.</li> |
| <li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories. |
| If False and remote path is a directory, will quit regardless if any files |
| would be overwritten or not. If True, only matching filenames are actually |
| overwritten.</li> |
| <li><strong>buffersize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for internal buffer. This block cannot be bigger than |
| a chunk and cannot be smaller than a block.</li> |
| <li><strong>blocksize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for a block. Within each chunk, we write a smaller |
| block for each API call. This block cannot be bigger than a chunk.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return a AzureDLFileSystem object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file"> |
| <code class="descname">upload_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.upload_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Data Lake.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>local_path</strong> (<em>str</em>) – local path. Can be single file, directory (in which case, |
| upload recursively) or glob pattern. Recursive glob patterns using <cite>**</cite> |
| are not supported.</li> |
| <li><strong>remote_path</strong> (<em>str</em>) – Remote path to upload to; if multiple files, this is the |
| dircetory root to write within.</li> |
| <li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories. |
| If False and remote path is a directory, will quit regardless if any files |
| would be overwritten or not. If True, only matching filenames are actually |
| overwritten.</li> |
| <li><strong>buffersize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for internal buffer. This block cannot be bigger than |
| a chunk and cannot be smaller than a block.</li> |
| <li><strong>blocksize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for a block. Within each chunk, we write a smaller |
| block for each API call. This block cannot be bigger than a chunk.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_fileshare_hook.</code><code class="descname">AzureFileShareHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure FileShare Storage.</p> |
| <p>Additional options passed in the ‘extra’ field of the connection will be |
| passed to the <cite>FileService()</cite> constructor.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory"> |
| <code class="descname">check_for_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a directory exists on Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the file exists, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file"> |
| <code class="descname">check_for_file</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a file exists on Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the file exists, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory"> |
| <code class="descname">create_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a new directory on a Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_directory()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of files and directories</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the FileService object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file"> |
| <code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Where to store the file.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.get_file_to_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream"> |
| <code class="descname">get_file_to_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file_to_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>stream</strong> (<em>file-like object</em>) – A filehandle to store the file to.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.get_file_to_stream()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files"> |
| <code class="descname">list_directories_and_files</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.list_directories_and_files"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the list of directories and files stored on a Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.list_directories_and_files()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of files and directories</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream"> |
| <code class="descname">load_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>count</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a stream to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>stream</strong> (<em>file-like</em>) – Opened file/stream to upload as the file content.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>count</strong> (<em>int</em>) – Size of the stream in bytes</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_stream()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a string to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – String to load.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_text()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.bigquery_hook.</code><code class="descname">BigQueryHook</code><span class="sig-paren">(</span><em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>use_legacy_sql=True</em>, <em>location=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a>, <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Interact with BigQuery. This hook uses the Google Cloud Platform |
| connection.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a BigQuery PEP 249 connection object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em>, <em>dialect=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Pandas DataFrame for the results produced by a BigQuery |
| query. The DbApiHook method must be overridden because Pandas |
| doesn’t support PEP 249 connections, except for SQLite. See:</p> |
| <p><a class="reference external" href="https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447">https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447</a> |
| <a class="reference external" href="https://github.com/pydata/pandas/issues/6900">https://github.com/pydata/pandas/issues/6900</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – The BigQuery SQL to execute.</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with (not |
| used, leave to override superclass method)</li> |
| <li><strong>dialect</strong> (<em>string in {'legacy'</em><em>, </em><em>'standard'}</em>) – Dialect of BigQuery SQL – legacy SQL or standard SQL |
| defaults to use <cite>self.use_legacy_sql</cite> if not specified</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service"> |
| <code class="descname">get_service</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_service"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a BigQuery service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Insertion is currently unsupported. Theoretically, you could use |
| BigQuery’s streaming API to insert rows into a table, but this hasn’t |
| been implemented.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists"> |
| <code class="descname">table_exists</code><span class="sig-paren">(</span><em>project_id</em>, <em>dataset_id</em>, <em>table_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for the existence of a table in Google BigQuery.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google cloud project in which to look for the |
| table. The connection supplied to the hook must provide access to |
| the specified project.</li> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The name of the dataset in which to look for the |
| table.</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The name of the table to check the existence of.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.cassandra_hook.CassandraHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.cassandra_hook.</code><code class="descname">CassandraHook</code><span class="sig-paren">(</span><em>cassandra_conn_id='cassandra_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cassandra_hook.html#CassandraHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cassandra_hook.CassandraHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Hook used to interact with Cassandra</p> |
| <p>Contact points can be specified as a comma-separated string in the ‘hosts’ |
| field of the connection.</p> |
| <p>Port can be specified in the port field of the connection.</p> |
| <p>If SSL is enabled in Cassandra, pass in a dict in the extra field as kwargs for |
| <code class="docutils literal notranslate"><span class="pre">ssl.wrap_socket()</span></code>. For example:</p> |
| <blockquote> |
| <div><dl class="docutils"> |
| <dt>{</dt> |
| <dd><dl class="first docutils"> |
| <dt>‘ssl_options’ <span class="classifier-delimiter">:</span> <span class="classifier">{</span></dt> |
| <dd>‘ca_certs’ : PATH_TO_CA_CERTS</dd> |
| </dl> |
| <p class="last">}</p> |
| </dd> |
| </dl> |
| <p>}</p> |
| </div></blockquote> |
| <dl class="docutils"> |
| <dt>Default load balancing policy is RoundRobinPolicy. To specify a different LB policy:</dt> |
| <dd><ul class="first last"> |
| <li><dl class="first docutils"> |
| <dt>DCAwareRoundRobinPolicy</dt> |
| <dd><dl class="first last docutils"> |
| <dt>{</dt> |
| <dd><blockquote class="first"> |
| <div><p>‘load_balancing_policy’: ‘DCAwareRoundRobinPolicy’, |
| ‘load_balancing_policy_args’: {</p> |
| <blockquote> |
| <div><p>‘local_dc’: LOCAL_DC_NAME, // optional |
| ‘used_hosts_per_remote_dc’: SOME_INT_VALUE, // optional</p> |
| </div></blockquote> |
| <p>}</p> |
| </div></blockquote> |
| <p class="last">}</p> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| </li> |
| <li><dl class="first docutils"> |
| <dt>WhiteListRoundRobinPolicy</dt> |
| <dd><dl class="first docutils"> |
| <dt>{</dt> |
| <dd><p class="first">‘load_balancing_policy’: ‘WhiteListRoundRobinPolicy’, |
| ‘load_balancing_policy_args’: {</p> |
| <blockquote> |
| <div><p>‘hosts’: [‘HOST1’, ‘HOST2’, ‘HOST3’]</p> |
| </div></blockquote> |
| <p class="last">}</p> |
| </dd> |
| </dl> |
| <p class="last">}</p> |
| </dd> |
| </dl> |
| </li> |
| <li><dl class="first docutils"> |
| <dt>TokenAwarePolicy</dt> |
| <dd><dl class="first docutils"> |
| <dt>{</dt> |
| <dd><p class="first">‘load_balancing_policy’: ‘TokenAwarePolicy’, |
| ‘load_balancing_policy_args’: {</p> |
| <blockquote> |
| <div><p>‘child_load_balancing_policy’: CHILD_POLICY_NAME, // optional |
| ‘child_load_balancing_policy_args’: { … } // optional</p> |
| </div></blockquote> |
| <p class="last">}</p> |
| </dd> |
| </dl> |
| <p class="last">}</p> |
| </dd> |
| </dl> |
| </li> |
| </ul> |
| </dd> |
| </dl> |
| <p>For details of the Cluster config, see cassandra.cluster.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.cassandra_hook.CassandraHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cassandra_hook.html#CassandraHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cassandra_hook.CassandraHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a cassandra Session object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.cassandra_hook.CassandraHook.record_exists"> |
| <code class="descname">record_exists</code><span class="sig-paren">(</span><em>table</em>, <em>keys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cassandra_hook.html#CassandraHook.record_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cassandra_hook.CassandraHook.record_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a record exists in Cassandra</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>string</em>) – Target Cassandra table. |
| Use dot notation to target a specific keyspace.</li> |
| <li><strong>keys</strong> (<em>dict</em>) – The keys and their values to check the existence.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.cassandra_hook.CassandraHook.shutdown_cluster"> |
| <code class="descname">shutdown_cluster</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cassandra_hook.html#CassandraHook.shutdown_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cassandra_hook.CassandraHook.shutdown_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Closes all sessions and connections associated with this Cluster.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.cassandra_hook.CassandraHook.table_exists"> |
| <code class="descname">table_exists</code><span class="sig-paren">(</span><em>table</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cassandra_hook.html#CassandraHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cassandra_hook.CassandraHook.table_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a table exists in Cassandra</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>table</strong> (<em>string</em>) – Target Cassandra table. |
| Use dot notation to target a specific keyspace.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.cloudant_hook.CloudantHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.cloudant_hook.</code><code class="descname">CloudantHook</code><span class="sig-paren">(</span><em>cloudant_conn_id='cloudant_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cloudant_hook.html#CloudantHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cloudant_hook.CloudantHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interact with Cloudant.</p> |
| <p>This class is a thin wrapper around the cloudant python library. See the |
| documentation <a class="reference external" href="https://github.com/cloudant-labs/cloudant-python">here</a>.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.cloudant_hook.CloudantHook.db"> |
| <code class="descname">db</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/cloudant_hook.html#CloudantHook.db"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.cloudant_hook.CloudantHook.db" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the Database object for this hook.</p> |
| <p>See the documentation for cloudant-python here |
| <a class="reference external" href="https://github.com/cloudant-labs/cloudant-python">https://github.com/cloudant-labs/cloudant-python</a>.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.databricks_hook.DatabricksHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.databricks_hook.</code><code class="descname">DatabricksHook</code><span class="sig-paren">(</span><em>databricks_conn_id='databricks_default'</em>, <em>timeout_seconds=180</em>, <em>retry_limit=3</em>, <em>retry_delay=1.0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/databricks_hook.html#DatabricksHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.databricks_hook.DatabricksHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Interact with Databricks.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.databricks_hook.DatabricksHook.run_now"> |
| <code class="descname">run_now</code><span class="sig-paren">(</span><em>json</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/databricks_hook.html#DatabricksHook.run_now"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.databricks_hook.DatabricksHook.run_now" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Utility function to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/run-now</span></code> endpoint.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>json</strong> (<em>dict</em>) – The data used in the body of the request to the <code class="docutils literal notranslate"><span class="pre">run-now</span></code> endpoint.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the run_id as a string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">string</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.databricks_hook.DatabricksHook.submit_run"> |
| <code class="descname">submit_run</code><span class="sig-paren">(</span><em>json</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/databricks_hook.html#DatabricksHook.submit_run"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.databricks_hook.DatabricksHook.submit_run" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Utility function to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>json</strong> (<em>dict</em>) – The data used in the body of the request to the <code class="docutils literal notranslate"><span class="pre">submit</span></code> endpoint.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the run_id as a string</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">string</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.datadog_hook.DatadogHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.datadog_hook.</code><code class="descname">DatadogHook</code><span class="sig-paren">(</span><em>datadog_conn_id='datadog_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datadog_hook.html#DatadogHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datadog_hook.DatadogHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Uses datadog API to send metrics of practically anything measurable, |
| so it’s possible to track # of db records inserted/deleted, records read |
| from file and many other useful metrics.</p> |
| <p>Depends on the datadog API, which has to be deployed on the same server where |
| Airflow runs.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>datadog_conn_id</strong> – The connection to datadog, containing metadata for api keys.</li> |
| <li><strong>datadog_conn_id</strong> – string</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datadog_hook.DatadogHook.post_event"> |
| <code class="descname">post_event</code><span class="sig-paren">(</span><em>title</em>, <em>text</em>, <em>tags=None</em>, <em>alert_type=None</em>, <em>aggregation_key=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datadog_hook.html#DatadogHook.post_event"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datadog_hook.DatadogHook.post_event" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Posts an event to datadog (processing finished, potentially alerts, other issues) |
| Think about this as a means to maintain persistence of alerts, rather than |
| alerting itself.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>title</strong> (<em>string</em>) – The title of the event</li> |
| <li><strong>text</strong> (<em>string</em>) – The body of the event (more information)</li> |
| <li><strong>tags</strong> (<em>list</em>) – List of string tags to apply to the event</li> |
| <li><strong>alert_type</strong> (<em>string</em>) – The alert type for the event, one of |
| [“error”, “warning”, “info”, “success”]</li> |
| <li><strong>aggregation_key</strong> (<em>string</em>) – Key that can be used to aggregate this event in a stream</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datadog_hook.DatadogHook.query_metric"> |
| <code class="descname">query_metric</code><span class="sig-paren">(</span><em>query</em>, <em>from_seconds_ago</em>, <em>to_seconds_ago</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datadog_hook.html#DatadogHook.query_metric"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datadog_hook.DatadogHook.query_metric" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Queries datadog for a specific metric, potentially with some |
| function applied to it and returns the results.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query</strong> (<em>string</em>) – The datadog query to execute (see datadog docs)</li> |
| <li><strong>from_seconds_ago</strong> (<em>int</em>) – How many seconds ago to start querying for.</li> |
| <li><strong>to_seconds_ago</strong> (<em>int</em>) – Up to how many seconds ago to query for.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datadog_hook.DatadogHook.send_metric"> |
| <code class="descname">send_metric</code><span class="sig-paren">(</span><em>metric_name</em>, <em>datapoint</em>, <em>tags=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datadog_hook.html#DatadogHook.send_metric"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datadog_hook.DatadogHook.send_metric" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sends a single datapoint metric to DataDog</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>metric_name</strong> (<em>string</em>) – The name of the metric</li> |
| <li><strong>datapoint</strong> (<em>integer</em><em> or </em><em>float</em>) – A single integer or float related to the metric</li> |
| <li><strong>tags</strong> (<em>list</em>) – A list of tags associated with the metric</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.datastore_hook.</code><code class="descname">DatastoreHook</code><span class="sig-paren">(</span><em>datastore_conn_id='google_cloud_datastore_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Interact with Google Cloud Datastore. This hook uses the Google Cloud Platform |
| connection.</p> |
| <p>This object is not threads safe. If you want to make multiple requests |
| simultaneously, you will need to create a hook per thread.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids"> |
| <code class="descname">allocate_ids</code><span class="sig-paren">(</span><em>partialKeys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.allocate_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Allocate IDs for incomplete keys. |
| see <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>partialKeys</strong> – a list of partial keys</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a list of full keys.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction"> |
| <code class="descname">begin_transaction</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.begin_transaction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a new transaction handle</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction</a></p> |
| </div> |
| </div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">a transaction handle</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.commit"> |
| <code class="descname">commit</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.commit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.commit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Commit a transaction, optionally creating, deleting or modifying some entities.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the commit request</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the response body of the commit request</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation"> |
| <code class="descname">delete_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.delete_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the long-running operation</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket"> |
| <code class="descname">export_to_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.export_to_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Export entities from Cloud Datastore to Cloud Storage for backup</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><em>version='v1'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Datastore service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation"> |
| <code class="descname">get_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the latest state of a long-running operation</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket"> |
| <code class="descname">import_from_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.import_from_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Import a backup from Cloud Storage to Cloud Datastore</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup"> |
| <code class="descname">lookup</code><span class="sig-paren">(</span><em>keys</em>, <em>read_consistency=None</em>, <em>transaction=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.lookup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lookup some entities by key</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>keys</strong> – the keys to lookup</li> |
| <li><strong>read_consistency</strong> – the read consistency to use. default, strong or eventual. |
| Cannot be used with a transaction.</li> |
| <li><strong>transaction</strong> – the transaction to use, if any.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">the response body of the lookup request.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done"> |
| <code class="descname">poll_operation_until_done</code><span class="sig-paren">(</span><em>name</em>, <em>polling_interval_in_seconds</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.poll_operation_until_done"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Poll backup operation state until it’s completed</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback"> |
| <code class="descname">rollback</code><span class="sig-paren">(</span><em>transaction</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.rollback"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Roll back a transaction</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>transaction</strong> – the transaction to roll back</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query"> |
| <code class="descname">run_query</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run a query for entities.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the query request</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the batch of query results.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.discord_webhook_hook.DiscordWebhookHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.discord_webhook_hook.</code><code class="descname">DiscordWebhookHook</code><span class="sig-paren">(</span><em>http_conn_id=None</em>, <em>webhook_endpoint=None</em>, <em>message=''</em>, <em>username=None</em>, <em>avatar_url=None</em>, <em>tts=False</em>, <em>proxy=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/discord_webhook_hook.html#DiscordWebhookHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.discord_webhook_hook.DiscordWebhookHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.http_hook.HttpHook" title="airflow.hooks.http_hook.HttpHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.http_hook.HttpHook</span></code></a></p> |
| <p>This hook allows you to post messages to Discord using incoming webhooks. |
| Takes a Discord connection ID with a default relative webhook endpoint. The |
| default endpoint can be overridden using the webhook_endpoint parameter |
| (<a class="reference external" href="https://discordapp.com/developers/docs/resources/webhook">https://discordapp.com/developers/docs/resources/webhook</a>).</p> |
| <p>Each Discord webhook can be pre-configured to use a specific username and |
| avatar_url. You can override these defaults in this hook.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>http_conn_id</strong> (<em>str</em>) – Http connection ID with host as “<a class="reference external" href="https://discord.com/api/">https://discord.com/api/</a>” and |
| default webhook endpoint in the extra field in the form of |
| {“webhook_endpoint”: “webhooks/{webhook.id}/{webhook.token}”}</li> |
| <li><strong>webhook_endpoint</strong> (<em>str</em>) – Discord webhook endpoint in the form of |
| “webhooks/{webhook.id}/{webhook.token}”</li> |
| <li><strong>message</strong> (<em>str</em>) – The message you want to send to your Discord channel |
| (max 2000 characters)</li> |
| <li><strong>username</strong> (<em>str</em>) – Override the default username of the webhook</li> |
| <li><strong>avatar_url</strong> (<em>str</em>) – Override the default avatar of the webhook</li> |
| <li><strong>tts</strong> (<em>bool</em>) – Is a text-to-speech message</li> |
| <li><strong>proxy</strong> (<em>str</em>) – Proxy to use to make the Discord webhook call</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.discord_webhook_hook.DiscordWebhookHook.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/discord_webhook_hook.html#DiscordWebhookHook.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.discord_webhook_hook.DiscordWebhookHook.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the Discord webhook call</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.emr_hook.EmrHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.emr_hook.</code><code class="descname">EmrHook</code><span class="sig-paren">(</span><em>emr_conn_id=None</em>, <em>region_name=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS EMR. emr_conn_id is only necessary for using the |
| create_job_flow method.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow"> |
| <code class="descname">create_job_flow</code><span class="sig-paren">(</span><em>job_flow_overrides</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook.create_job_flow"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a job flow using the config from the EMR connection. |
| Keys of the json extra hash may have the arguments of the boto3 |
| run_job_flow method. |
| Overrides for this config may be passed as the job_flow_overrides.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.fs_hook.FSHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.fs_hook.</code><code class="descname">FSHook</code><span class="sig-paren">(</span><em>conn_id='fs_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/fs_hook.html#FSHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.fs_hook.FSHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Allows for interaction with an file server.</p> |
| <p>Connection should have a name and a path specified under extra:</p> |
| <p>example: |
| Conn Id: fs_test |
| Conn Type: File (path) |
| Host, Shchema, Login, Password, Port: empty |
| Extra: {“path”: “/tmp”}</p> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.ftp_hook.</code><code class="descname">FTPHook</code><span class="sig-paren">(</span><em>ftp_conn_id='ftp_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Interact with FTP.</p> |
| <p>Errors that may occur throughout but should be handled |
| downstream.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.close_conn"> |
| <code class="descname">close_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.close_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.close_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Closes the connection. An error will occur if the |
| connection wasn’t ever opened.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.create_directory"> |
| <code class="descname">create_directory</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.create_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a directory on the remote system.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>str</em>) – full path to the remote directory to create</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.delete_directory"> |
| <code class="descname">delete_directory</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.delete_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.delete_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a directory on the remote system.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>str</em>) – full path to the remote directory to delete</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.delete_file"> |
| <code class="descname">delete_file</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.delete_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Removes a file on the FTP Server.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>str</em>) – full path to the remote file</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.describe_directory"> |
| <code class="descname">describe_directory</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.describe_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.describe_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a dictionary of {filename: {attributes}} for all files |
| on the remote system (where the MLSD command is supported).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>str</em>) – full path to the remote directory</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a FTP connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.get_mod_time"> |
| <code class="descname">get_mod_time</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.get_mod_time"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.get_mod_time" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a datetime object representing the last time the file was modified</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>string</em>) – remote file path</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.get_size"> |
| <code class="descname">get_size</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.get_size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.get_size" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the size of a file (in bytes)</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>string</em>) – remote file path</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.list_directory"> |
| <code class="descname">list_directory</code><span class="sig-paren">(</span><em>path</em>, <em>nlst=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.list_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.list_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of files on the remote system.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>path</strong> (<em>str</em>) – full path to the remote directory to list</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.rename"> |
| <code class="descname">rename</code><span class="sig-paren">(</span><em>from_name</em>, <em>to_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.rename"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.rename" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Rename a file.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>from_name</strong> – rename file from name</li> |
| <li><strong>to_name</strong> – rename file to name</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.retrieve_file"> |
| <code class="descname">retrieve_file</code><span class="sig-paren">(</span><em>remote_full_path</em>, <em>local_full_path_or_buffer</em>, <em>callback=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.retrieve_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.retrieve_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transfers the remote file to a local location.</p> |
| <p>If local_full_path_or_buffer is a string path, the file will be put |
| at that location; if it is a file-like buffer, the file will |
| be written to the buffer but not closed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>remote_full_path</strong> (<em>str</em>) – full path to the remote file</li> |
| <li><strong>local_full_path_or_buffer</strong> (<em>str</em><em> or </em><em>file-like buffer</em>) – full path to the local file or a |
| file-like buffer</li> |
| <li><strong>callback</strong> (<em>callable</em>) – callback which is called each time a block of data |
| is read. if you do not use a callback, these blocks will be written |
| to the file or buffer passed in. if you do pass in a callback, note |
| that writing to a file or buffer will need to be handled inside the |
| callback. |
| [default: output_handle.write()]</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Example::</dt> |
| <dd><p class="first">hook = FTPHook(ftp_conn_id=’my_conn’)</p> |
| <p>remote_path = ‘/path/to/remote/file’ |
| local_path = ‘/path/to/local/file’</p> |
| <p># with a custom callback (in this case displaying progress on each read) |
| def print_progress(percent_progress):</p> |
| <blockquote> |
| <div>self.log.info(‘Percent Downloaded: %s%%’ % percent_progress)</div></blockquote> |
| <p>total_downloaded = 0 |
| total_file_size = hook.get_size(remote_path) |
| output_handle = open(local_path, ‘wb’) |
| def write_to_file_with_progress(data):</p> |
| <blockquote> |
| <div>total_downloaded += len(data) |
| output_handle.write(data) |
| percent_progress = (total_downloaded / total_file_size) * 100 |
| print_progress(percent_progress)</div></blockquote> |
| <p>hook.retrieve_file(remote_path, None, callback=write_to_file_with_progress)</p> |
| <p class="last"># without a custom callback data is written to the local_path |
| hook.retrieve_file(remote_path, local_path)</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPHook.store_file"> |
| <code class="descname">store_file</code><span class="sig-paren">(</span><em>remote_full_path</em>, <em>local_full_path_or_buffer</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPHook.store_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPHook.store_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transfers a local file to the remote location.</p> |
| <p>If local_full_path_or_buffer is a string path, the file will be read |
| from that location; if it is a file-like buffer, the file will |
| be read from the buffer but not closed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>remote_full_path</strong> (<em>str</em>) – full path to the remote file</li> |
| <li><strong>local_full_path_or_buffer</strong> (<em>str</em><em> or </em><em>file-like buffer</em>) – full path to the local file or a |
| file-like buffer</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPSHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.ftp_hook.</code><code class="descname">FTPSHook</code><span class="sig-paren">(</span><em>ftp_conn_id='ftp_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPSHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPSHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.ftp_hook.FTPHook" title="airflow.contrib.hooks.ftp_hook.FTPHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.ftp_hook.FTPHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ftp_hook.FTPSHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ftp_hook.html#FTPSHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ftp_hook.FTPSHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a FTPS connection object.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_api_base_hook.</code><code class="descname">GoogleCloudBaseHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_api_base_hook.html#GoogleCloudBaseHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>A base hook for Google cloud-related hooks. Google cloud has a shared REST |
| API client that is built in the same way no matter which service you use. |
| This class helps construct and authorize the credentials needed to then |
| call googleapiclient.discovery.build() to actually discover and build a client |
| for a Google cloud service.</p> |
| <p>The class also contains some miscellaneous helper functions.</p> |
| <p>All hook derived from this base hook use the ‘Google Cloud Platform’ connection |
| type. Three ways of authentication are supported:</p> |
| <p>Default credentials: Only the ‘Project Id’ is required. You’ll need to |
| have set up default credentials, such as by the |
| <code class="docutils literal notranslate"><span class="pre">GOOGLE_APPLICATION_DEFAULT</span></code> environment variable or from the metadata |
| server on Google Compute Engine.</p> |
| <p>JSON key file: Specify ‘Project Id’, ‘Keyfile Path’ and ‘Scope’.</p> |
| <p>Legacy P12 key files are not supported.</p> |
| <p>JSON data provided in the UI: Specify ‘Keyfile JSON’.</p> |
| <dl class="staticmethod"> |
| <dt id="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.fallback_to_default_project_id"> |
| <em class="property">static </em><code class="descname">fallback_to_default_project_id</code><span class="sig-paren">(</span><em>func</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_api_base_hook.html#GoogleCloudBaseHook.fallback_to_default_project_id"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.fallback_to_default_project_id" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Decorator that provides fallback for Google Cloud Platform project id. If |
| the project is None it will be replaced with the project_id from the |
| service account the Hook is authenticated with. Project id can be specified |
| either via project_id kwarg or via first parameter in positional args.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>func</strong> – function to wrap</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">result of the function call</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_dataflow_hook.</code><code class="descname">DataFlowHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Dataflow service object.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_dataproc_hook.</code><code class="descname">DataProcHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>api_version='v1beta2'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataproc_hook.html#DataProcHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for Google Cloud Dataproc APIs.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook.await"> |
| <code class="descname">await</code><span class="sig-paren">(</span><em>operation</em><span class="sig-paren">)</span><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook.await" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Awaits for Google Cloud Dataproc Operation to complete.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataproc_hook.html#DataProcHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Dataproc service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook.wait"> |
| <code class="descname">wait</code><span class="sig-paren">(</span><em>operation</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataproc_hook.html#DataProcHook.wait"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook.wait" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Awaits for Google Cloud Dataproc Operation to complete.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_mlengine_hook.</code><code class="descname">MLEngineHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job"> |
| <code class="descname">create_job</code><span class="sig-paren">(</span><em>project_id</em>, <em>job</em>, <em>use_existing_job_fn=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Launches a MLEngine job and wait for it to reach a terminal state.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project id within which MLEngine |
| job will be launched.</li> |
| <li><strong>job</strong> (<em>dict</em>) – <p>MLEngine Job object that should be provided to the MLEngine |
| API, such as:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s1">'jobId'</span><span class="p">:</span> <span class="s1">'my_job_id'</span><span class="p">,</span> |
| <span class="s1">'trainingInput'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'scaleTier'</span><span class="p">:</span> <span class="s1">'STANDARD_1'</span><span class="p">,</span> |
| <span class="o">...</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><strong>use_existing_job_fn</strong> (<em>function</em>) – In case that a MLEngine job with the same |
| job_id already exist, this method (if provided) will decide whether |
| we should use this existing job, continue waiting for it to finish |
| and returning the job object. It should accepts a MLEngine job |
| object, and returns a boolean value indicating whether it is OK to |
| reuse the existing job. If ‘use_existing_job_fn’ is not provided, |
| we by default reuse the existing MLEngine job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The MLEngine job object if the job successfully reach a |
| terminal state (which might be FAILED or CANCELLED state).</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model"> |
| <code class="descname">create_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a Model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version"> |
| <code class="descname">create_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_spec</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates the Version on Google Cloud ML Engine.</p> |
| <p>Returns the operation if the version was created successfully and |
| raises an error otherwise.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version"> |
| <code class="descname">delete_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.delete_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the given version of a model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google MLEngine service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model"> |
| <code class="descname">get_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a Model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions"> |
| <code class="descname">list_versions</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.list_versions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists all available versions of a model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version"> |
| <code class="descname">set_default_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.set_default_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets a version to be the default. Blocks until finished.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_pubsub_hook.</code><code class="descname">PubSubHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for accessing Google Pub/Sub.</p> |
| <p>The GCP project against which actions are applied is determined by |
| the project embedded in the Connection referenced by gcp_conn_id.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.acknowledge"> |
| <code class="descname">acknowledge</code><span class="sig-paren">(</span><em>project</em>, <em>subscription</em>, <em>ack_ids</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.acknowledge"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.acknowledge" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Pulls up to <code class="docutils literal notranslate"><span class="pre">max_messages</span></code> messages from Pub/Sub subscription.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project</strong> (<em>string</em>) – the GCP project name or ID in which to create |
| the topic</li> |
| <li><strong>subscription</strong> (<em>string</em>) – the Pub/Sub subscription name to delete; do not |
| include the ‘projects/{project}/topics/’ prefix.</li> |
| <li><strong>ack_ids</strong> (<em>list</em>) – List of ReceivedMessage ackIds from a previous pull |
| response</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.create_subscription"> |
| <code class="descname">create_subscription</code><span class="sig-paren">(</span><em>topic_project</em>, <em>topic</em>, <em>subscription=None</em>, <em>subscription_project=None</em>, <em>ack_deadline_secs=10</em>, <em>fail_if_exists=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.create_subscription"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.create_subscription" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a Pub/Sub subscription, if it does not already exist.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>topic_project</strong> (<em>string</em>) – the GCP project ID of the topic that the |
| subscription will be bound to.</li> |
| <li><strong>topic</strong> (<em>string</em>) – the Pub/Sub topic name that the subscription will be bound |
| to create; do not include the <code class="docutils literal notranslate"><span class="pre">projects/{project}/subscriptions/</span></code> |
| prefix.</li> |
| <li><strong>subscription</strong> (<em>string</em>) – the Pub/Sub subscription name. If empty, a random |
| name will be generated using the uuid module</li> |
| <li><strong>subscription_project</strong> (<em>string</em>) – the GCP project ID where the subscription |
| will be created. If unspecified, <code class="docutils literal notranslate"><span class="pre">topic_project</span></code> will be used.</li> |
| <li><strong>ack_deadline_secs</strong> (<em>int</em>) – Number of seconds that a subscriber has to |
| acknowledge each message pulled from the subscription</li> |
| <li><strong>fail_if_exists</strong> (<em>bool</em>) – if set, raise an exception if the topic |
| already exists</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">subscription name which will be the system-generated value if |
| the <code class="docutils literal notranslate"><span class="pre">subscription</span></code> parameter is not supplied</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">string</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.create_topic"> |
| <code class="descname">create_topic</code><span class="sig-paren">(</span><em>project</em>, <em>topic</em>, <em>fail_if_exists=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.create_topic"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.create_topic" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a Pub/Sub topic, if it does not already exist.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project</strong> (<em>string</em>) – the GCP project ID in which to create |
| the topic</li> |
| <li><strong>topic</strong> (<em>string</em>) – the Pub/Sub topic name to create; do not |
| include the <code class="docutils literal notranslate"><span class="pre">projects/{project}/topics/</span></code> prefix.</li> |
| <li><strong>fail_if_exists</strong> (<em>bool</em>) – if set, raise an exception if the topic |
| already exists</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.delete_subscription"> |
| <code class="descname">delete_subscription</code><span class="sig-paren">(</span><em>project</em>, <em>subscription</em>, <em>fail_if_not_exists=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.delete_subscription"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.delete_subscription" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a Pub/Sub subscription, if it exists.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project</strong> (<em>string</em>) – the GCP project ID where the subscription exists</li> |
| <li><strong>subscription</strong> (<em>string</em>) – the Pub/Sub subscription name to delete; do not |
| include the <code class="docutils literal notranslate"><span class="pre">projects/{project}/subscriptions/</span></code> prefix.</li> |
| <li><strong>fail_if_not_exists</strong> (<em>bool</em>) – if set, raise an exception if the topic |
| does not exist</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.delete_topic"> |
| <code class="descname">delete_topic</code><span class="sig-paren">(</span><em>project</em>, <em>topic</em>, <em>fail_if_not_exists=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.delete_topic"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.delete_topic" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a Pub/Sub topic if it exists.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project</strong> (<em>string</em>) – the GCP project ID in which to delete the topic</li> |
| <li><strong>topic</strong> (<em>string</em>) – the Pub/Sub topic name to delete; do not |
| include the <code class="docutils literal notranslate"><span class="pre">projects/{project}/topics/</span></code> prefix.</li> |
| <li><strong>fail_if_not_exists</strong> (<em>bool</em>) – if set, raise an exception if the topic |
| does not exist</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Pub/Sub service object.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">googleapiclient.discovery.Resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.publish"> |
| <code class="descname">publish</code><span class="sig-paren">(</span><em>project</em>, <em>topic</em>, <em>messages</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.publish"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.publish" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Publishes messages to a Pub/Sub topic.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project</strong> (<em>string</em>) – the GCP project ID in which to publish</li> |
| <li><strong>topic</strong> (<em>string</em>) – the Pub/Sub topic to which to publish; do not |
| include the <code class="docutils literal notranslate"><span class="pre">projects/{project}/topics/</span></code> prefix.</li> |
| <li><strong>messages</strong> (list of PubSub messages; see |
| <a class="reference external" href="http://cloud.google.com/pubsub/docs/reference/rest/v1/PubsubMessage">http://cloud.google.com/pubsub/docs/reference/rest/v1/PubsubMessage</a>) – messages to publish; if the data field in a |
| message is set, it should already be base64 encoded.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.pull"> |
| <code class="descname">pull</code><span class="sig-paren">(</span><em>project</em>, <em>subscription</em>, <em>max_messages</em>, <em>return_immediately=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_pubsub_hook.html#PubSubHook.pull"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_pubsub_hook.PubSubHook.pull" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Pulls up to <code class="docutils literal notranslate"><span class="pre">max_messages</span></code> messages from Pub/Sub subscription.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project</strong> (<em>string</em>) – the GCP project ID where the subscription exists</li> |
| <li><strong>subscription</strong> (<em>string</em>) – the Pub/Sub subscription name to pull from; do not |
| include the ‘projects/{project}/topics/’ prefix.</li> |
| <li><strong>max_messages</strong> (<em>int</em>) – The maximum number of messages to return from |
| the Pub/Sub API.</li> |
| <li><strong>return_immediately</strong> (<em>bool</em>) – If set, the Pub/Sub API will immediately |
| return if no messages are available. Otherwise, the request will |
| block for an undisclosed, but bounded period of time</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>:return A list of Pub/Sub ReceivedMessage objects each containing</dt> |
| <dd>an <code class="docutils literal notranslate"><span class="pre">ackId</span></code> property and a <code class="docutils literal notranslate"><span class="pre">message</span></code> property, which includes |
| the base64-encoded message content. See |
| <a class="reference external" href="https://cloud.google.com/pubsub/docs/reference/rest/v1/">https://cloud.google.com/pubsub/docs/reference/rest/v1/</a> projects.subscriptions/pull#ReceivedMessage</dd> |
| </dl> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcs_hook.</code><code class="descname">GoogleCloudStorageHook</code><span class="sig-paren">(</span><em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Interact with Google Cloud Storage. This hook uses the Google Cloud Platform |
| connection.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy"> |
| <code class="descname">copy</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.copy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Copies an object from a bucket to another, with renaming if requested.</p> |
| <p>destination_bucket or destination_object can be omitted, in which case |
| source bucket/object is used, but not both.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li> |
| <li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to. |
| Can be omitted; then the same bucket is used.</li> |
| <li><strong>destination_object</strong> (<em>string</em>) – The (renamed) path of the object if given. |
| Can be omitted; then the same name is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket"> |
| <code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new bucket. Google Cloud Storage uses a flat namespace, so |
| you can’t create a bucket with a name that is already in use.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information, see Bucket Naming Guidelines: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket.</li> |
| <li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored |
| and determines the SLA and the cost of storage. Values include</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li> |
| </ul> |
| <p>If this value is not specified when the bucket is |
| created, it will default to STANDARD.</p> |
| </li> |
| <li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. |
| Object data for objects in the bucket resides in physical storage |
| within this region. Defaults to US.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p> |
| </div> |
| </li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">If successful, it returns the <code class="docutils literal notranslate"><span class="pre">id</span></code> of the bucket.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete"> |
| <code class="descname">delete</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>generation=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.delete"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete an object if versioning is not enabled for the bucket, or if generation |
| parameter is used.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – name of the bucket, where the object resides</li> |
| <li><strong>object</strong> (<em>string</em>) – name of the object to delete</li> |
| <li><strong>generation</strong> (<em>string</em>) – if present, permanently delete the object of this generation</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if succeeded</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download"> |
| <code class="descname">download</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.download"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a file from Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to fetch from.</li> |
| <li><strong>object</strong> (<em>string</em>) – The object to fetch.</li> |
| <li><strong>filename</strong> (<em>string</em>) – If set, a local file path where the file should be written to.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists"> |
| <code class="descname">exists</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for the existence of a file in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Storage service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c"> |
| <code class="descname">get_crc32c</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_crc32c"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the CRC32c checksum of an object in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash"> |
| <code class="descname">get_md5hash</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_md5hash"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the MD5 hash of an object in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size"> |
| <code class="descname">get_size</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the size of a file in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl"> |
| <code class="descname">insert_bucket_acl</code><span class="sig-paren">(</span><em>bucket</em>, <em>entity</em>, <em>role</em>, <em>user_project</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.insert_bucket_acl"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new ACL entry on the specified bucket. |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert">https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li> |
| <li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms: |
| user-userId, user-email, group-groupId, group-email, domain-domain, |
| project-team-projectId, allUsers, allAuthenticatedUsers. |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/access-control/lists#scopes">https://cloud.google.com/storage/docs/access-control/lists#scopes</a></li> |
| <li><strong>role</strong> (<em>str</em>) – The access permission for the entity. |
| Acceptable values are: “OWNER”, “READER”, “WRITER”.</li> |
| <li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request. |
| Required for Requester Pays buckets.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl"> |
| <code class="descname">insert_object_acl</code><span class="sig-paren">(</span><em>bucket</em>, <em>object_name</em>, <em>entity</em>, <em>role</em>, <em>generation</em>, <em>user_project</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.insert_object_acl"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new ACL entry on the specified object. |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert">https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li> |
| <li><strong>object_name</strong> (<em>str</em>) – Name of the object. For information about how to URL encode |
| object names to be path safe, see: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/#encoding">https://cloud.google.com/storage/docs/json_api/#encoding</a></li> |
| <li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms: |
| user-userId, user-email, group-groupId, group-email, domain-domain, |
| project-team-projectId, allUsers, allAuthenticatedUsers |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/access-control/lists#scopes">https://cloud.google.com/storage/docs/access-control/lists#scopes</a></li> |
| <li><strong>role</strong> (<em>str</em>) – The access permission for the entity. |
| Acceptable values are: “OWNER”, “READER”.</li> |
| <li><strong>generation</strong> (<em>str</em>) – (Optional) If present, selects a specific revision of this |
| object (as opposed to the latest version, the default).</li> |
| <li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request. |
| Required for Requester Pays buckets.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after"> |
| <code class="descname">is_updated_after</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>ts</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.is_updated_after"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if an object is updated in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| <li><strong>ts</strong> (<em>datetime</em>) – The timestamp to check against.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list"> |
| <code class="descname">list</code><span class="sig-paren">(</span><em>bucket</em>, <em>versions=None</em>, <em>maxResults=None</em>, <em>prefix=None</em>, <em>delimiter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list" title="Permalink to this definition">¶</a></dt> |
| <dd><p>List all objects from the bucket with the give string prefix in name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – bucket name</li> |
| <li><strong>versions</strong> (<em>boolean</em>) – if true, list all versions of the objects</li> |
| <li><strong>maxResults</strong> (<em>integer</em>) – max count of items to return in a single page of responses</li> |
| <li><strong>prefix</strong> (<em>string</em>) – prefix string which filters objects whose name begin with |
| this prefix</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – filters objects based on the delimiter (for e.g ‘.csv’)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">a stream of object names matching the filtering criteria</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite"> |
| <code class="descname">rewrite</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.rewrite"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Has the same functionality as copy, except that will work on files |
| over 5 TB, as well as when copying between locations and/or storage |
| classes.</p> |
| <p>destination_object can be omitted, in which case source_object is used.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li> |
| <li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.</li> |
| <li><strong>destination_object</strong> – The (renamed) path of the object if given. |
| Can be omitted; then the same name is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload"> |
| <code class="descname">upload</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename</em>, <em>mime_type='application/octet-stream'</em>, <em>gzip=False</em>, <em>multipart=False</em>, <em>num_retries=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads a local file to Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to.</li> |
| <li><strong>object</strong> (<em>string</em>) – The object name to set when uploading the local file.</li> |
| <li><strong>filename</strong> (<em>string</em>) – The local file path to the file to be uploaded.</li> |
| <li><strong>mime_type</strong> (<em>str</em>) – The MIME type to set when uploading the file.</li> |
| <li><strong>gzip</strong> (<em>bool</em>) – Option to compress file for upload</li> |
| <li><strong>multipart</strong> (<em>bool</em><em> or </em><em>int</em>) – If True, the upload will be split into multiple HTTP requests. The |
| default size is 256MiB per request. Pass a number instead of True to |
| specify the request size, which must be a multiple of 262144 (256KiB).</li> |
| <li><strong>num_retries</strong> (<em>int</em>) – The number of times to attempt to re-upload the file (or individual |
| chunks, in the case of multipart uploads). Retries are attempted |
| with exponential backoff.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_transfer_hook.</code><code class="descname">GCPTransferServiceHook</code><span class="sig-paren">(</span><em>api_version='v1'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_transfer_hook.html#GCPTransferServiceHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="integration.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for GCP Storage Transfer Service.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_transfer_hook.html#GCPTransferServiceHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves connection to Google Storage Transfer service.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Storage Transfer service object</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.imap_hook.ImapHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.imap_hook.</code><code class="descname">ImapHook</code><span class="sig-paren">(</span><em>imap_conn_id='imap_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/imap_hook.html#ImapHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.imap_hook.ImapHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>This hook connects to a mail server by using the imap protocol.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>imap_conn_id</strong> (<em>str</em>) – The connection id that contains the information |
| used to authenticate the client. |
| The default value is ‘imap_default’.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.imap_hook.ImapHook.download_mail_attachments"> |
| <code class="descname">download_mail_attachments</code><span class="sig-paren">(</span><em>name</em>, <em>local_output_directory</em>, <em>mail_folder='INBOX'</em>, <em>check_regex=False</em>, <em>latest_only=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/imap_hook.html#ImapHook.download_mail_attachments"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.imap_hook.ImapHook.download_mail_attachments" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Downloads mail’s attachments in the mail folder by its name |
| to the local directory.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>name</strong> (<em>str</em>) – The name of the attachment that will be downloaded.</li> |
| <li><strong>local_output_directory</strong> (<em>str</em>) – The output directory on the local machine |
| where the files will be downloaded to.</li> |
| <li><strong>mail_folder</strong> (<em>str</em>) – The mail folder where to look at. |
| The default value is ‘INBOX’.</li> |
| <li><strong>check_regex</strong> (<em>bool</em>) – Checks the name for a regular expression. |
| The default value is False.</li> |
| <li><strong>latest_only</strong> (<em>bool</em>) – If set to True it will only download |
| the first matched attachment. |
| The default value is False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.imap_hook.ImapHook.has_mail_attachment"> |
| <code class="descname">has_mail_attachment</code><span class="sig-paren">(</span><em>name</em>, <em>mail_folder='INBOX'</em>, <em>check_regex=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/imap_hook.html#ImapHook.has_mail_attachment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.imap_hook.ImapHook.has_mail_attachment" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks the mail folder for mails containing attachments with the given name.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – The name of the attachment that will be searched for.</li> |
| <li><strong>mail_folder</strong> (<em>str</em>) – The mail folder where to look at. |
| The default value is ‘INBOX’.</li> |
| <li><strong>check_regex</strong> (<em>bool</em>) – Checks the name for a regular expression. |
| The default value is False.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if there is an attachment with the given name and False if not.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.imap_hook.ImapHook.retrieve_mail_attachments"> |
| <code class="descname">retrieve_mail_attachments</code><span class="sig-paren">(</span><em>name</em>, <em>mail_folder='INBOX'</em>, <em>check_regex=False</em>, <em>latest_only=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/imap_hook.html#ImapHook.retrieve_mail_attachments"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.imap_hook.ImapHook.retrieve_mail_attachments" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves mail’s attachments in the mail folder by its name.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – The name of the attachment that will be downloaded.</li> |
| <li><strong>mail_folder</strong> (<em>str</em>) – The mail folder where to look at. |
| The default value is ‘INBOX’.</li> |
| <li><strong>check_regex</strong> (<em>bool</em>) – Checks the name for a regular expression. |
| The default value is False.</li> |
| <li><strong>latest_only</strong> (<em>bool</em>) – If set to True it will only retrieve |
| the first matched attachment. |
| The default value is False.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">a list of tuple each containing the attachment filename and its payload.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">a list of tuple</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.jira_hook.JiraHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.jira_hook.</code><code class="descname">JiraHook</code><span class="sig-paren">(</span><em>jira_conn_id='jira_default'</em>, <em>proxies=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/jira_hook.html#JiraHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.jira_hook.JiraHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Jira interaction hook, a Wrapper around JIRA Python SDK.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>jira_conn_id</strong> (<em>string</em>) – reference to a pre-defined Jira Connection</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.mongo_hook.</code><code class="descname">MongoHook</code><span class="sig-paren">(</span><em>conn_id='mongo_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>PyMongo Wrapper to Interact With Mongo Database |
| Mongo Connection Documentation |
| <a class="reference external" href="https://docs.mongodb.com/manual/reference/connection-string/index.html">https://docs.mongodb.com/manual/reference/connection-string/index.html</a> |
| You can specify connection string options in extra field of your connection |
| <a class="reference external" href="https://docs.mongodb.com/manual/reference/connection-string/index.html#connection-string-options">https://docs.mongodb.com/manual/reference/connection-string/index.html#connection-string-options</a> |
| ex.</p> |
| <blockquote> |
| <div>{replicaSet: test, ssl: True, connectTimeoutMS: 30000}</div></blockquote> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook.aggregate"> |
| <code class="descname">aggregate</code><span class="sig-paren">(</span><em>mongo_collection</em>, <em>aggregate_query</em>, <em>mongo_db=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook.aggregate"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook.aggregate" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Runs an aggregation pipeline and returns the results |
| <a class="reference external" href="https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.aggregate">https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.aggregate</a> |
| <a class="reference external" href="https://api.mongodb.com/python/current/examples/aggregation.html">https://api.mongodb.com/python/current/examples/aggregation.html</a></p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook.find"> |
| <code class="descname">find</code><span class="sig-paren">(</span><em>mongo_collection</em>, <em>query</em>, <em>find_one=False</em>, <em>mongo_db=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook.find"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook.find" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Runs a mongo find query and returns the results |
| <a class="reference external" href="https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.find">https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.find</a></p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook.get_collection"> |
| <code class="descname">get_collection</code><span class="sig-paren">(</span><em>mongo_collection</em>, <em>mongo_db=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook.get_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook.get_collection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fetches a mongo collection object for querying.</p> |
| <p>Uses connection schema as DB unless specified.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fetches PyMongo Client</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook.insert_many"> |
| <code class="descname">insert_many</code><span class="sig-paren">(</span><em>mongo_collection</em>, <em>docs</em>, <em>mongo_db=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook.insert_many"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook.insert_many" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Inserts many docs into a mongo collection. |
| <a class="reference external" href="https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.insert_many">https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.insert_many</a></p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.mongo_hook.MongoHook.insert_one"> |
| <code class="descname">insert_one</code><span class="sig-paren">(</span><em>mongo_collection</em>, <em>doc</em>, <em>mongo_db=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/mongo_hook.html#MongoHook.insert_one"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.mongo_hook.MongoHook.insert_one" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Inserts a single document into a mongo collection |
| <a class="reference external" href="https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.insert_one">https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.insert_one</a></p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.openfaas_hook.OpenFaasHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.openfaas_hook.</code><code class="descname">OpenFaasHook</code><span class="sig-paren">(</span><em>function_name=None</em>, <em>conn_id='open_faas_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/openfaas_hook.html#OpenFaasHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.openfaas_hook.OpenFaasHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interact with Openfaas to query, deploy, invoke and update function</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>function_name</strong> – Name of the function, Defaults to None</li> |
| <li><strong>conn_id</strong> (<em>str</em>) – openfass connection to use, Defaults to open_faas_default |
| for example host : <a class="reference external" href="http://openfaas.faas.com">http://openfaas.faas.com</a>, Conn Type : Http</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.pinot_hook.</code><code class="descname">PinotDbApiHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Connect to pinot db(<a class="reference external" href="https://github.com/linkedin/pinot">https://github.com/linkedin/pinot</a>) to issue pql</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Establish a connection to pinot broker through pinot dbqpi.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_first"> |
| <code class="descname">get_first</code><span class="sig-paren">(</span><em>sql</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.get_first"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_first" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns the first resulting row.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns a pandas dataframe</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>list</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_records"> |
| <code class="descname">get_records</code><span class="sig-paren">(</span><em>sql</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.get_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_records" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executes the sql and returns a set of records.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>sql</strong> (<em>str</em>) – the sql statement to be executed (str) or a list of |
| sql statements to execute</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_uri"> |
| <code class="descname">get_uri</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.get_uri"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.get_uri" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the connection uri for pinot broker.</p> |
| <p>e.g: <a class="reference external" href="http://localhost:9000/pql">http://localhost:9000/pql</a></p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A generic way to insert a set of tuples into a table, |
| a new transaction is created every commit_every rows</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – Name of the target table</li> |
| <li><strong>rows</strong> (<em>iterable of tuples</em>) – The rows to insert into the table</li> |
| <li><strong>target_fields</strong> (<em>iterable of strings</em>) – The names of the columns to fill in the table</li> |
| <li><strong>commit_every</strong> (<em>int</em>) – The maximum number of rows to insert in one |
| transaction. Set to 0 to insert all rows in one transaction.</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Whether to replace instead of insert</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.pinot_hook.PinotDbApiHook.set_autocommit"> |
| <code class="descname">set_autocommit</code><span class="sig-paren">(</span><em>conn</em>, <em>autocommit</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/pinot_hook.html#PinotDbApiHook.set_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.pinot_hook.PinotDbApiHook.set_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the autocommit flag on the connection</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.qubole_hook.QuboleHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.qubole_hook.</code><code class="descname">QuboleHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/qubole_hook.html#QuboleHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.qubole_hook.QuboleHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.qubole_hook.QuboleHook.get_jobs_id"> |
| <code class="descname">get_jobs_id</code><span class="sig-paren">(</span><em>ti</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/qubole_hook.html#QuboleHook.get_jobs_id"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.qubole_hook.QuboleHook.get_jobs_id" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get jobs associated with a Qubole commands |
| :param ti: Task Instance of the dag, used to determine the Quboles command id |
| :return: Job informations assoiciated with command</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.qubole_hook.QuboleHook.get_log"> |
| <code class="descname">get_log</code><span class="sig-paren">(</span><em>ti</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/qubole_hook.html#QuboleHook.get_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.qubole_hook.QuboleHook.get_log" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get Logs of a command from Qubole |
| :param ti: Task Instance of the dag, used to determine the Quboles command id |
| :return: command log as text</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.qubole_hook.QuboleHook.get_results"> |
| <code class="descname">get_results</code><span class="sig-paren">(</span><em>ti=None</em>, <em>fp=None</em>, <em>inline=True</em>, <em>delim=None</em>, <em>fetch=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/qubole_hook.html#QuboleHook.get_results"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.qubole_hook.QuboleHook.get_results" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get results (or just s3 locations) of a command from Qubole and save into a file |
| :param ti: Task Instance of the dag, used to determine the Quboles command id |
| :param fp: Optional file pointer, will create one and return if None passed |
| :param inline: True to download actual results, False to get s3 locations only |
| :param delim: Replaces the CTL-A chars with the given delim, defaults to ‘,’ |
| :param fetch: when inline is True, get results directly from s3 (if large) |
| :return: file location containing actual results or s3 locations of results</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.qubole_hook.QuboleHook.kill"> |
| <code class="descname">kill</code><span class="sig-paren">(</span><em>ti</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/qubole_hook.html#QuboleHook.kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.qubole_hook.QuboleHook.kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Kill (cancel) a Qubole command |
| :param ti: Task Instance of the dag, used to determine the Quboles command id |
| :return: response from Qubole</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.redis_hook.RedisHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.redis_hook.</code><code class="descname">RedisHook</code><span class="sig-paren">(</span><em>redis_conn_id='redis_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redis_hook.html#RedisHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redis_hook.RedisHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Hook to interact with Redis database</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redis_hook.RedisHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redis_hook.html#RedisHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redis_hook.RedisHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Redis connection.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redis_hook.RedisHook.key_exists"> |
| <code class="descname">key_exists</code><span class="sig-paren">(</span><em>key</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redis_hook.html#RedisHook.key_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redis_hook.RedisHook.key_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a key exists in Redis database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>key</strong> (<em>string</em>) – The key to check the existence.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.redshift_hook.</code><code class="descname">RedshiftHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Redshift, using the boto3 library</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status"> |
| <code class="descname">cluster_status</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.cluster_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return status of a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot"> |
| <code class="descname">create_cluster_snapshot</code><span class="sig-paren">(</span><em>snapshot_identifier</em>, <em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.create_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a snapshot of a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster"> |
| <code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>skip_final_cluster_snapshot=True</em>, <em>final_cluster_snapshot_identifier=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a cluster and optionally create a snapshot</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| <li><strong>skip_final_cluster_snapshot</strong> (<em>bool</em>) – determines cluster snapshot creation</li> |
| <li><strong>final_cluster_snapshot_identifier</strong> (<em>str</em>) – name of final cluster snapshot</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots"> |
| <code class="descname">describe_cluster_snapshots</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.describe_cluster_snapshots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a list of snapshots for a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot"> |
| <code class="descname">restore_from_cluster_snapshot</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>snapshot_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.restore_from_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Restores a cluster from its snapshot</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| <li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sagemaker_hook.</code><code class="descname">SageMakerHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with Amazon SageMaker.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_s3_url"> |
| <code class="descname">check_s3_url</code><span class="sig-paren">(</span><em>s3url</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_s3_url"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_s3_url" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if an S3 URL exists</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>s3url</strong> (<em>str</em>) – S3 url</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">bool</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_status"> |
| <code class="descname">check_status</code><span class="sig-paren">(</span><em>job_name</em>, <em>key</em>, <em>describe_function</em>, <em>check_interval</em>, <em>max_ingestion_time</em>, <em>non_terminal_states=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check status of a SageMaker job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the job to check status</li> |
| <li><strong>key</strong> (<em>str</em>) – the key of the response dict |
| that points to the state</li> |
| <li><strong>describe_function</strong> (<em>python callable</em>) – the function used to retrieve the status</li> |
| <li><strong>args</strong> – the arguments for the function</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| <li><strong>non_terminal_states</strong> (<em>set</em>) – the set of nonterminal states</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">response of describe call after job is done</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_config"> |
| <code class="descname">check_training_config</code><span class="sig-paren">(</span><em>training_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a training configuration is valid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>training_config</strong> (<em>dict</em>) – training_config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_status_with_log"> |
| <code class="descname">check_training_status_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>non_terminal_states</em>, <em>failed_states</em>, <em>wait_for_completion</em>, <em>check_interval</em>, <em>max_ingestion_time</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_status_with_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_status_with_log" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Display the logs for a given training job, optionally tailing them until the |
| job is complete.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the training job to check status and display logs for</li> |
| <li><strong>non_terminal_states</strong> (<em>set</em>) – the set of non_terminal states</li> |
| <li><strong>failed_states</strong> (<em>set</em>) – the set of failed states</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether to keep looking for new log entries |
| until the job completes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – The interval in seconds between polling for new log entries and job completion</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_tuning_config"> |
| <code class="descname">check_tuning_config</code><span class="sig-paren">(</span><em>tuning_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_tuning_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_tuning_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a tuning configuration is valid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>tuning_config</strong> (<em>dict</em>) – tuning_config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.configure_s3_resources"> |
| <code class="descname">configure_s3_resources</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.configure_s3_resources"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.configure_s3_resources" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Extract the S3 operations from the configuration and execute them.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – config of SageMaker operation</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint"> |
| <code class="descname">create_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create an endpoint</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint_config"> |
| <code class="descname">create_endpoint_config</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create an endpoint config</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for endpoint-config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to endpoint config creation</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_model"> |
| <code class="descname">create_model</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a model job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for model</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to model creation</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_training_job"> |
| <code class="descname">create_training_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>print_log=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_training_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_training_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a training job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for training</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to training job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_transform_job"> |
| <code class="descname">create_transform_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_transform_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_transform_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a transform job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for transform job</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to transform job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_tuning_job"> |
| <code class="descname">create_tuning_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_tuning_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_tuning_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a tuning job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for tuning</li> |
| <li><strong>wait_for_completion</strong> – if the program should keep running until job finishes</li> |
| <li><strong>wait_for_completion</strong> – bool</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to tuning job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint"> |
| <code class="descname">describe_endpoint</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint_config"> |
| <code class="descname">describe_endpoint_config</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the endpoint config info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint config info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_model"> |
| <code class="descname">describe_model</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the SageMaker model info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the SageMaker model</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the model info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job"> |
| <code class="descname">describe_training_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the training job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – the name of the training job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the training job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job_with_log"> |
| <code class="descname">describe_training_job_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>positions</em>, <em>stream_names</em>, <em>instance_count</em>, <em>state</em>, <em>last_description</em>, <em>last_describe_job_call</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job_with_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job_with_log" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the training job info associated with job_name and print CloudWatch logs</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_transform_job"> |
| <code class="descname">describe_transform_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_transform_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_transform_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the transform job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the transform job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the transform job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_tuning_job"> |
| <code class="descname">describe_tuning_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_tuning_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_tuning_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the tuning job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the tuning job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the tuning job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Establish an AWS connection for SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-class docutils literal notranslate"><span class="pre">SageMaker.Client</span></code></a></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_log_conn"> |
| <code class="descname">get_log_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_log_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_log_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Establish an AWS connection for retrieving logs during training</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><code class="xref py py-class docutils literal notranslate"><span class="pre">CloudWatchLog.Client</span></code></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.log_stream"> |
| <code class="descname">log_stream</code><span class="sig-paren">(</span><em>log_group</em>, <em>stream_name</em>, <em>start_time=0</em>, <em>skip=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.log_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.log_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A generator for log items in a single stream. This will yield all the |
| items that are available at the current moment.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li> |
| <li><strong>stream_name</strong> (<em>str</em>) – The name of the specific stream.</li> |
| <li><strong>start_time</strong> (<em>int</em>) – The time stamp value to start reading the logs from (default: 0).</li> |
| <li><strong>skip</strong> (<em>int</em>) – The number of log entries to skip at the start (default: 0). |
| This is for when there are multiple entries at the same timestamp.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">dict</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><div class="line-block"> |
| <div class="line">A CloudWatch log event with the following key-value pairs:</div> |
| <div class="line-block"> |
| <div class="line">’timestamp’ (int): The time in milliseconds of the event.</div> |
| <div class="line">’message’ (str): The log event data.</div> |
| <div class="line">’ingestionTime’ (int): The time in milliseconds the event was ingested.</div> |
| </div> |
| </div> |
| </p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.multi_stream_iter"> |
| <code class="descname">multi_stream_iter</code><span class="sig-paren">(</span><em>log_group</em>, <em>streams</em>, <em>positions=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.multi_stream_iter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.multi_stream_iter" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Iterate over the available events coming from a set of log streams in a single log group |
| interleaving the events from each stream so they’re yielded in timestamp order.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li> |
| <li><strong>streams</strong> (<em>list</em>) – A list of the log stream names. The position of the stream in this list is |
| the stream number.</li> |
| <li><strong>positions</strong> (<em>list</em>) – A list of pairs of (timestamp, skip) which represents the last record |
| read from each stream.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A tuple of (stream number, cloudwatch log event).</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.tar_and_s3_upload"> |
| <code class="descname">tar_and_s3_upload</code><span class="sig-paren">(</span><em>path</em>, <em>key</em>, <em>bucket</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.tar_and_s3_upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.tar_and_s3_upload" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Tar the local file or directory and upload to s3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>path</strong> (<em>str</em>) – local file or directory</li> |
| <li><strong>key</strong> (<em>str</em>) – s3 key</li> |
| <li><strong>bucket</strong> (<em>str</em>) – s3 bucket</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.update_endpoint"> |
| <code class="descname">update_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.update_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.update_endpoint" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Update an endpoint</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint update</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.salesforce_hook.</code><code class="descname">SalesforceHook</code><span class="sig-paren">(</span><em>conn_id</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook.describe_object"> |
| <code class="descname">describe_object</code><span class="sig-paren">(</span><em>obj</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook.describe_object"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook.describe_object" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get the description of an object from Salesforce.</p> |
| <p>This description is the object’s schema |
| and some extra metadata that Salesforce stores for each object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>obj</strong> – Name of the Salesforce object |
| that we are getting a description of.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook.get_available_fields"> |
| <code class="descname">get_available_fields</code><span class="sig-paren">(</span><em>obj</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook.get_available_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook.get_available_fields" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a list of all available fields for an object.</p> |
| <p>This only returns the names of the fields.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook.get_object_from_salesforce"> |
| <code class="descname">get_object_from_salesforce</code><span class="sig-paren">(</span><em>obj</em>, <em>fields</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook.get_object_from_salesforce"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook.get_object_from_salesforce" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get all instances of the <cite>object</cite> from Salesforce. |
| For each model, only get the fields specified in fields.</p> |
| <dl class="docutils"> |
| <dt>All we really do underneath the hood is run:</dt> |
| <dd>SELECT <fields> FROM <obj>;</dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook.make_query"> |
| <code class="descname">make_query</code><span class="sig-paren">(</span><em>query</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook.make_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook.make_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Make a query to Salesforce. Returns result in dictionary</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>query</strong> – The query to make to Salesforce</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook.sign_in"> |
| <code class="descname">sign_in</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook.sign_in"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook.sign_in" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sign into Salesforce.</p> |
| <p>If we have already signed it, this will just return the original object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.salesforce_hook.SalesforceHook.write_object_to_file"> |
| <code class="descname">write_object_to_file</code><span class="sig-paren">(</span><em>query_results</em>, <em>filename</em>, <em>fmt='csv'</em>, <em>coerce_to_timestamp=False</em>, <em>record_time_added=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/salesforce_hook.html#SalesforceHook.write_object_to_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.salesforce_hook.SalesforceHook.write_object_to_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Write query results to file.</p> |
| <dl class="docutils"> |
| <dt>Acceptable formats are:</dt> |
| <dd><ul class="first last simple"> |
| <li><dl class="first docutils"> |
| <dt>csv:</dt> |
| <dd>comma-separated-values file. This is the default format.</dd> |
| </dl> |
| </li> |
| <li><dl class="first docutils"> |
| <dt>json:</dt> |
| <dd>JSON array. Each element in the array is a different row.</dd> |
| </dl> |
| </li> |
| <li><dl class="first docutils"> |
| <dt>ndjson:</dt> |
| <dd>JSON array but each element is new-line delimited |
| instead of comma delimited like in <cite>json</cite></dd> |
| </dl> |
| </li> |
| </ul> |
| </dd> |
| </dl> |
| <p>This requires a significant amount of cleanup. |
| Pandas doesn’t handle output to CSV and json in a uniform way. |
| This is especially painful for datetime types. |
| Pandas wants to write them as strings in CSV, |
| but as millisecond Unix timestamps.</p> |
| <p>By default, this function will try and leave all values as |
| they are represented in Salesforce. |
| You use the <cite>coerce_to_timestamp</cite> flag to force all datetimes |
| to become Unix timestamps (UTC). |
| This is can be greatly beneficial as it will make all of your |
| datetime fields look the same, |
| and makes it easier to work with in other database environments</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query_results</strong> – the results from a SQL query</li> |
| <li><strong>filename</strong> – the name of the file where the data |
| should be dumped to</li> |
| <li><strong>fmt</strong> – the format you want the output in. |
| <em>Default:</em> csv.</li> |
| <li><strong>coerce_to_timestamp</strong> – True if you want all datetime fields to be |
| converted into Unix timestamps. |
| False if you want them to be left in the |
| same format as they were in Salesforce. |
| Leaving the value as False will result |
| in datetimes being strings. |
| <em>Defaults to False</em></li> |
| <li><strong>record_time_added</strong> – <em>(optional)</em> True if you want to add a |
| Unix timestamp field to the resulting data |
| that marks when the data |
| was fetched from Salesforce. |
| <em>Default: False</em>.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sftp_hook.</code><code class="descname">SFTPHook</code><span class="sig-paren">(</span><em>ftp_conn_id='sftp_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.ssh_hook.SSHHook" title="airflow.contrib.hooks.ssh_hook.SSHHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.ssh_hook.SSHHook</span></code></a></p> |
| <p>This hook is inherited from SSH hook. Please refer to SSH hook for the input |
| arguments.</p> |
| <p>Interact with SFTP. Aims to be interchangeable with FTPHook.</p> |
| <dl class="docutils"> |
| <dt>Pitfalls: - In contrast with FTPHook describe_directory only returns size, type and</dt> |
| <dd><blockquote class="first"> |
| <div>modify. It doesn’t return unix.owner, unix.mode, perm, unix.group and |
| unique.</div></blockquote> |
| <ul class="last simple"> |
| <li>retrieve_file and store_file only take a local full path and not a |
| buffer.</li> |
| <li>If no mode is passed to create_directory it will be created with 777 |
| permissions.</li> |
| </ul> |
| </dd> |
| </dl> |
| <p>Errors that may occur throughout but should be handled downstream.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.close_conn"> |
| <code class="descname">close_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.close_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.close_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Closes the connection. An error will occur if the |
| connection wasnt ever opened.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.create_directory"> |
| <code class="descname">create_directory</code><span class="sig-paren">(</span><em>path</em>, <em>mode=777</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.create_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a directory on the remote system. |
| :param path: full path to the remote directory to create |
| :type path: str |
| :param mode: int representation of octal mode for directory</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.delete_directory"> |
| <code class="descname">delete_directory</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.delete_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.delete_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a directory on the remote system. |
| :param path: full path to the remote directory to delete |
| :type path: str</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.delete_file"> |
| <code class="descname">delete_file</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.delete_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Removes a file on the FTP Server |
| :param path: full path to the remote file |
| :type path: str</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.describe_directory"> |
| <code class="descname">describe_directory</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.describe_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.describe_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a dictionary of {filename: {attributes}} for all files |
| on the remote system (where the MLSD command is supported). |
| :param path: full path to the remote directory |
| :type path: str</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns an SFTP connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.list_directory"> |
| <code class="descname">list_directory</code><span class="sig-paren">(</span><em>path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.list_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.list_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a list of files on the remote system. |
| :param path: full path to the remote directory to list |
| :type path: str</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.retrieve_file"> |
| <code class="descname">retrieve_file</code><span class="sig-paren">(</span><em>remote_full_path</em>, <em>local_full_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.retrieve_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.retrieve_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transfers the remote file to a local location. |
| If local_full_path is a string path, the file will be put |
| at that location |
| :param remote_full_path: full path to the remote file |
| :type remote_full_path: str |
| :param local_full_path: full path to the local file |
| :type local_full_path: str</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sftp_hook.SFTPHook.store_file"> |
| <code class="descname">store_file</code><span class="sig-paren">(</span><em>remote_full_path</em>, <em>local_full_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sftp_hook.html#SFTPHook.store_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sftp_hook.SFTPHook.store_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Transfers a local file to the remote location. |
| If local_full_path_or_buffer is a string path, the file will be read |
| from that location |
| :param remote_full_path: full path to the remote file |
| :type remote_full_path: str |
| :param local_full_path: full path to the local file |
| :type local_full_path: str</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.slack_webhook_hook.SlackWebhookHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.slack_webhook_hook.</code><code class="descname">SlackWebhookHook</code><span class="sig-paren">(</span><em>http_conn_id=None</em>, <em>webhook_token=None</em>, <em>message=''</em>, <em>channel=None</em>, <em>username=None</em>, <em>icon_emoji=None</em>, <em>link_names=False</em>, <em>proxy=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/slack_webhook_hook.html#SlackWebhookHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.slack_webhook_hook.SlackWebhookHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.http_hook.HttpHook" title="airflow.hooks.http_hook.HttpHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.http_hook.HttpHook</span></code></a></p> |
| <p>This hook allows you to post messages to Slack using incoming webhooks. |
| Takes both Slack webhook token directly and connection that has Slack webhook token. |
| If both supplied, Slack webhook token will be used.</p> |
| <p>Each Slack webhook token can be pre-configured to use a specific channel, username and |
| icon. You can override these defaults in this hook.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>http_conn_id</strong> (<em>str</em>) – connection that has Slack webhook token in the extra field</li> |
| <li><strong>webhook_token</strong> (<em>str</em>) – Slack webhook token</li> |
| <li><strong>message</strong> (<em>str</em>) – The message you want to send on Slack</li> |
| <li><strong>channel</strong> (<em>str</em>) – The channel the message should be posted to</li> |
| <li><strong>username</strong> (<em>str</em>) – The username to post to slack with</li> |
| <li><strong>icon_emoji</strong> (<em>str</em>) – The emoji to use as icon for the user posting to Slack</li> |
| <li><strong>link_names</strong> (<em>bool</em>) – Whether or not to find and link channel and usernames in your |
| message</li> |
| <li><strong>proxy</strong> (<em>str</em>) – Proxy to use to make the Slack webhook call</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.slack_webhook_hook.SlackWebhookHook.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/slack_webhook_hook.html#SlackWebhookHook.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.slack_webhook_hook.SlackWebhookHook.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Remote Popen (actually execute the slack webhook call)</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cmd</strong> – command to remotely execute</li> |
| <li><strong>kwargs</strong> – extra arguments to Popen (see subprocess.Popen)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.snowflake_hook.SnowflakeHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.snowflake_hook.</code><code class="descname">SnowflakeHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/snowflake_hook.html#SnowflakeHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.snowflake_hook.SnowflakeHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with Snowflake.</p> |
| <p>get_sqlalchemy_engine() depends on snowflake-sqlalchemy</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.snowflake_hook.SnowflakeHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/snowflake_hook.html#SnowflakeHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.snowflake_hook.SnowflakeHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a snowflake.connection object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.snowflake_hook.SnowflakeHook.get_uri"> |
| <code class="descname">get_uri</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/snowflake_hook.html#SnowflakeHook.get_uri"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.snowflake_hook.SnowflakeHook.get_uri" title="Permalink to this definition">¶</a></dt> |
| <dd><p>override DbApiHook get_uri method for get_sqlalchemy_engine()</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.snowflake_hook.SnowflakeHook.set_autocommit"> |
| <code class="descname">set_autocommit</code><span class="sig-paren">(</span><em>conn</em>, <em>autocommit</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/snowflake_hook.html#SnowflakeHook.set_autocommit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.snowflake_hook.SnowflakeHook.set_autocommit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the autocommit flag on the connection</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.spark_jdbc_hook.SparkJDBCHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.spark_jdbc_hook.</code><code class="descname">SparkJDBCHook</code><span class="sig-paren">(</span><em>spark_app_name='airflow-spark-jdbc'</em>, <em>spark_conn_id='spark-default'</em>, <em>spark_conf=None</em>, <em>spark_py_files=None</em>, <em>spark_files=None</em>, <em>spark_jars=None</em>, <em>num_executors=None</em>, <em>executor_cores=None</em>, <em>executor_memory=None</em>, <em>driver_memory=None</em>, <em>verbose=False</em>, <em>principal=None</em>, <em>keytab=None</em>, <em>cmd_type='spark_to_jdbc'</em>, <em>jdbc_table=None</em>, <em>jdbc_conn_id='jdbc-default'</em>, <em>jdbc_driver=None</em>, <em>metastore_table=None</em>, <em>jdbc_truncate=False</em>, <em>save_mode=None</em>, <em>save_format=None</em>, <em>batch_size=None</em>, <em>fetch_size=None</em>, <em>num_partitions=None</em>, <em>partition_column=None</em>, <em>lower_bound=None</em>, <em>upper_bound=None</em>, <em>create_table_column_types=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/spark_jdbc_hook.html#SparkJDBCHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.spark_jdbc_hook.SparkJDBCHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook" title="airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook</span></code></a></p> |
| <p>This hook extends the SparkSubmitHook specifically for performing data |
| transfers to/from JDBC-based databases with Apache Spark.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>spark_app_name</strong> (<em>str</em>) – Name of the job (default airflow-spark-jdbc)</li> |
| <li><strong>spark_conn_id</strong> (<em>str</em>) – Connection id as configured in Airflow administration</li> |
| <li><strong>spark_conf</strong> (<em>dict</em>) – Any additional Spark configuration properties</li> |
| <li><strong>spark_py_files</strong> (<em>str</em>) – Additional python files used (.zip, .egg, or .py)</li> |
| <li><strong>spark_files</strong> (<em>str</em>) – Additional files to upload to the container running the job</li> |
| <li><strong>spark_jars</strong> (<em>str</em>) – Additional jars to upload and add to the driver and |
| executor classpath</li> |
| <li><strong>num_executors</strong> (<em>int</em>) – number of executor to run. This should be set so as to manage |
| the number of connections made with the JDBC database</li> |
| <li><strong>executor_cores</strong> (<em>int</em>) – Number of cores per executor</li> |
| <li><strong>executor_memory</strong> (<em>str</em>) – Memory per executor (e.g. 1000M, 2G)</li> |
| <li><strong>driver_memory</strong> (<em>str</em>) – Memory allocated to the driver (e.g. 1000M, 2G)</li> |
| <li><strong>verbose</strong> (<em>bool</em>) – Whether to pass the verbose flag to spark-submit for debugging</li> |
| <li><strong>keytab</strong> (<em>str</em>) – Full path to the file that contains the keytab</li> |
| <li><strong>principal</strong> (<em>str</em>) – The name of the kerberos principal used for keytab</li> |
| <li><strong>cmd_type</strong> (<em>str</em>) – Which way the data should flow. 2 possible values: |
| spark_to_jdbc: data written by spark from metastore to jdbc |
| jdbc_to_spark: data written by spark from jdbc to metastore</li> |
| <li><strong>jdbc_table</strong> (<em>str</em>) – The name of the JDBC table</li> |
| <li><strong>jdbc_conn_id</strong> – Connection id used for connection to JDBC database</li> |
| <li><strong>jdbc_driver</strong> (<em>str</em>) – Name of the JDBC driver to use for the JDBC connection. This |
| driver (usually a jar) should be passed in the ‘jars’ parameter</li> |
| <li><strong>metastore_table</strong> (<em>str</em>) – The name of the metastore table,</li> |
| <li><strong>jdbc_truncate</strong> (<em>bool</em>) – (spark_to_jdbc only) Whether or not Spark should truncate or |
| drop and recreate the JDBC table. This only takes effect if |
| ‘save_mode’ is set to Overwrite. Also, if the schema is |
| different, Spark cannot truncate, and will drop and recreate</li> |
| <li><strong>save_mode</strong> (<em>str</em>) – The Spark save-mode to use (e.g. overwrite, append, etc.)</li> |
| <li><strong>save_format</strong> (<em>str</em>) – (jdbc_to_spark-only) The Spark save-format to use (e.g. parquet)</li> |
| <li><strong>batch_size</strong> (<em>int</em>) – (spark_to_jdbc only) The size of the batch to insert per round |
| trip to the JDBC database. Defaults to 1000</li> |
| <li><strong>fetch_size</strong> (<em>int</em>) – (jdbc_to_spark only) The size of the batch to fetch per round trip |
| from the JDBC database. Default depends on the JDBC driver</li> |
| <li><strong>num_partitions</strong> (<em>int</em>) – The maximum number of partitions that can be used by Spark |
| simultaneously, both for spark_to_jdbc and jdbc_to_spark |
| operations. This will also cap the number of JDBC connections |
| that can be opened</li> |
| <li><strong>partition_column</strong> (<em>str</em>) – (jdbc_to_spark-only) A numeric column to be used to |
| partition the metastore table by. If specified, you must |
| also specify: |
| num_partitions, lower_bound, upper_bound</li> |
| <li><strong>lower_bound</strong> (<em>int</em>) – (jdbc_to_spark-only) Lower bound of the range of the numeric |
| partition column to fetch. If specified, you must also specify: |
| num_partitions, partition_column, upper_bound</li> |
| <li><strong>upper_bound</strong> (<em>int</em>) – (jdbc_to_spark-only) Upper bound of the range of the numeric |
| partition column to fetch. If specified, you must also specify: |
| num_partitions, partition_column, lower_bound</li> |
| <li><strong>create_table_column_types</strong> – (spark_to_jdbc-only) The database column data types |
| to use instead of the defaults, when creating the |
| table. Data type information should be specified in |
| the same format as CREATE TABLE columns syntax |
| (e.g: “name CHAR(64), comments VARCHAR(1024)”). |
| The specified types should be valid spark sql data |
| types.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">jdbc_conn_id: str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.spark_sql_hook.SparkSqlHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.spark_sql_hook.</code><code class="descname">SparkSqlHook</code><span class="sig-paren">(</span><em>sql</em>, <em>conf=None</em>, <em>conn_id='spark_sql_default'</em>, <em>total_executor_cores=None</em>, <em>executor_cores=None</em>, <em>executor_memory=None</em>, <em>keytab=None</em>, <em>principal=None</em>, <em>master='yarn'</em>, <em>name='default-name'</em>, <em>num_executors=None</em>, <em>verbose=True</em>, <em>yarn_queue='default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/spark_sql_hook.html#SparkSqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.spark_sql_hook.SparkSqlHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>This hook is a wrapper around the spark-sql binary. It requires that the |
| “spark-sql” binary is in the PATH. |
| :param sql: The SQL query to execute |
| :type sql: str |
| :param conf: arbitrary Spark configuration property |
| :type conf: str (format: PROP=VALUE) |
| :param conn_id: connection_id string |
| :type conn_id: str |
| :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors</p> |
| <blockquote> |
| <div>(Default: all the available cores on the worker)</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>executor_cores</strong> (<em>int</em>) – (Standalone & YARN only) Number of cores per |
| executor (Default: 2)</li> |
| <li><strong>executor_memory</strong> (<em>str</em>) – Memory per executor (e.g. 1000M, 2G) (Default: 1G)</li> |
| <li><strong>keytab</strong> (<em>str</em>) – Full path to the file that contains the keytab</li> |
| <li><strong>master</strong> (<em>str</em>) – spark://host:port, mesos://host:port, yarn, or local</li> |
| <li><strong>name</strong> (<em>str</em>) – Name of the job.</li> |
| <li><strong>num_executors</strong> (<em>int</em>) – Number of executors to launch</li> |
| <li><strong>verbose</strong> (<em>bool</em>) – Whether to pass the verbose flag to spark-sql</li> |
| <li><strong>yarn_queue</strong> (<em>str</em>) – The YARN queue to submit to (Default: “default”)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.spark_sql_hook.SparkSqlHook.run_query"> |
| <code class="descname">run_query</code><span class="sig-paren">(</span><em>cmd=''</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/spark_sql_hook.html#SparkSqlHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.spark_sql_hook.SparkSqlHook.run_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Remote Popen (actually execute the Spark-sql query)</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cmd</strong> – command to remotely execute</li> |
| <li><strong>kwargs</strong> – extra arguments to Popen (see subprocess.Popen)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.spark_submit_hook.</code><code class="descname">SparkSubmitHook</code><span class="sig-paren">(</span><em>conf=None</em>, <em>conn_id='spark_default'</em>, <em>files=None</em>, <em>py_files=None</em>, <em>driver_classpath=None</em>, <em>jars=None</em>, <em>java_class=None</em>, <em>packages=None</em>, <em>exclude_packages=None</em>, <em>repositories=None</em>, <em>total_executor_cores=None</em>, <em>executor_cores=None</em>, <em>executor_memory=None</em>, <em>driver_memory=None</em>, <em>keytab=None</em>, <em>principal=None</em>, <em>name='default-name'</em>, <em>num_executors=None</em>, <em>application_args=None</em>, <em>env_vars=None</em>, <em>verbose=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/spark_submit_hook.html#SparkSubmitHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>This hook is a wrapper around the spark-submit binary to kick off a spark-submit job. |
| It requires that the “spark-submit” binary is in the PATH or the spark_home to be |
| supplied. |
| :param conf: Arbitrary Spark configuration properties |
| :type conf: dict |
| :param conn_id: The connection id as configured in Airflow administration. When an</p> |
| <blockquote> |
| <div>invalid connection_id is supplied, it will default to yarn.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>files</strong> (<em>str</em>) – Upload additional files to the executor running the job, separated by a |
| comma. Files will be placed in the working directory of each executor. |
| For example, serialized objects.</li> |
| <li><strong>py_files</strong> (<em>str</em>) – Additional python files used by the job, can be .zip, .egg or .py.</li> |
| <li><strong>driver_classpath</strong> (<em>str</em>) – Additional, driver-specific, classpath settings.</li> |
| <li><strong>jars</strong> (<em>str</em>) – Submit additional jars to upload and place them in executor classpath.</li> |
| <li><strong>java_class</strong> (<em>str</em>) – the main class of the Java application</li> |
| <li><strong>packages</strong> – Comma-separated list of maven coordinates of jars to include on the</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>driver and executor classpaths |
| :type packages: str |
| :param exclude_packages: Comma-separated list of maven coordinates of jars to exclude |
| while resolving the dependencies provided in ‘packages’ |
| :type exclude_packages: str |
| :param repositories: Comma-separated list of additional remote repositories to search |
| for the maven coordinates given with ‘packages’ |
| :type repositories: str |
| :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors |
| (Default: all the available cores on the worker) |
| :type total_executor_cores: int |
| :param executor_cores: (Standalone, YARN and Kubernetes only) Number of cores per |
| executor (Default: 2) |
| :type executor_cores: int |
| :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) |
| :type executor_memory: str |
| :param driver_memory: Memory allocated to the driver (e.g. 1000M, 2G) (Default: 1G) |
| :type driver_memory: str |
| :param keytab: Full path to the file that contains the keytab |
| :type keytab: str |
| :param principal: The name of the kerberos principal used for keytab |
| :type principal: str |
| :param name: Name of the job (default airflow-spark) |
| :type name: str |
| :param num_executors: Number of executors to launch |
| :type num_executors: int |
| :param application_args: Arguments for the application being submitted |
| :type application_args: list |
| :param env_vars: Environment variables for spark-submit. It</p> |
| <blockquote> |
| <div>supports yarn and k8s mode too.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>verbose</strong> (<em>bool</em>) – Whether to pass the verbose flag to spark-submit process for debugging</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook.submit"> |
| <code class="descname">submit</code><span class="sig-paren">(</span><em>application=''</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/spark_submit_hook.html#SparkSubmitHook.submit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook.submit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Remote Popen to execute the spark-submit job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>application</strong> (<em>str</em>) – Submitted application, jar or py file</li> |
| <li><strong>kwargs</strong> – extra arguments to Popen (see subprocess.Popen)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.sqoop_hook.SqoopHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sqoop_hook.</code><code class="descname">SqoopHook</code><span class="sig-paren">(</span><em>conn_id='sqoop_default'</em>, <em>verbose=False</em>, <em>num_mappers=None</em>, <em>hcatalog_database=None</em>, <em>hcatalog_table=None</em>, <em>properties=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sqoop_hook.html#SqoopHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sqoop_hook.SqoopHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>This hook is a wrapper around the sqoop 1 binary. To be able to use the hook |
| it is required that “sqoop” is in the PATH.</p> |
| <p>Additional arguments that can be passed via the ‘extra’ JSON field of the |
| sqoop connection: |
| * job_tracker: Job tracker local|jobtracker:port. |
| * namenode: Namenode. |
| * lib_jars: Comma separated jar files to include in the classpath. |
| * files: Comma separated files to be copied to the map reduce cluster. |
| * archives: Comma separated archives to be unarchived on the compute</p> |
| <blockquote> |
| <div>machines.</div></blockquote> |
| <ul class="simple"> |
| <li>password_file: Path to file containing the password.</li> |
| </ul> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>conn_id</strong> (<em>str</em>) – Reference to the sqoop connection.</li> |
| <li><strong>verbose</strong> (<em>bool</em>) – Set sqoop to verbose.</li> |
| <li><strong>num_mappers</strong> (<em>int</em>) – Number of map tasks to import in parallel.</li> |
| <li><strong>properties</strong> (<em>dict</em>) – Properties to set via the -D argument</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sqoop_hook.SqoopHook.Popen"> |
| <code class="descname">Popen</code><span class="sig-paren">(</span><em>cmd</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sqoop_hook.html#SqoopHook.Popen"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sqoop_hook.SqoopHook.Popen" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Remote Popen</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>cmd</strong> – command to remotely execute</li> |
| <li><strong>kwargs</strong> – extra arguments to Popen (see subprocess.Popen)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">handle to subprocess</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sqoop_hook.SqoopHook.export_table"> |
| <code class="descname">export_table</code><span class="sig-paren">(</span><em>table</em>, <em>export_dir</em>, <em>input_null_string</em>, <em>input_null_non_string</em>, <em>staging_table</em>, <em>clear_staging_table</em>, <em>enclosed_by</em>, <em>escaped_by</em>, <em>input_fields_terminated_by</em>, <em>input_lines_terminated_by</em>, <em>input_optionally_enclosed_by</em>, <em>batch</em>, <em>relaxed_isolation</em>, <em>extra_export_options=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sqoop_hook.html#SqoopHook.export_table"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sqoop_hook.SqoopHook.export_table" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Exports Hive table to remote location. Arguments are copies of direct |
| sqoop command line Arguments |
| :param table: Table remote destination |
| :param export_dir: Hive table to export |
| :param input_null_string: The string to be interpreted as null for</p> |
| <blockquote> |
| <div>string columns</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>input_null_non_string</strong> – The string to be interpreted as null |
| for non-string columns</li> |
| <li><strong>staging_table</strong> – The table in which data will be staged before |
| being inserted into the destination table</li> |
| <li><strong>clear_staging_table</strong> – Indicate that any data present in the |
| staging table can be deleted</li> |
| <li><strong>enclosed_by</strong> – Sets a required field enclosing character</li> |
| <li><strong>escaped_by</strong> – Sets the escape character</li> |
| <li><strong>input_fields_terminated_by</strong> – Sets the field separator character</li> |
| <li><strong>input_lines_terminated_by</strong> – Sets the end-of-line character</li> |
| <li><strong>input_optionally_enclosed_by</strong> – Sets a field enclosing character</li> |
| <li><strong>batch</strong> – Use batch mode for underlying statement execution</li> |
| <li><strong>relaxed_isolation</strong> – Transaction isolation to read uncommitted |
| for the mappers</li> |
| <li><strong>extra_export_options</strong> – Extra export options to pass as dict. |
| If a key doesn’t have a value, just pass an empty string to it. |
| Don’t include prefix of – for sqoop options.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sqoop_hook.SqoopHook.import_query"> |
| <code class="descname">import_query</code><span class="sig-paren">(</span><em>query</em>, <em>target_dir</em>, <em>append=False</em>, <em>file_type='text'</em>, <em>split_by=None</em>, <em>direct=None</em>, <em>driver=None</em>, <em>extra_import_options=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sqoop_hook.html#SqoopHook.import_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sqoop_hook.SqoopHook.import_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Imports a specific query from the rdbms to hdfs |
| :param query: Free format query to run |
| :param target_dir: HDFS destination dir |
| :param append: Append data to an existing dataset in HDFS |
| :param file_type: “avro”, “sequence”, “text” or “parquet”</p> |
| <blockquote> |
| <div>Imports data to hdfs into the specified format. Defaults to text.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>split_by</strong> – Column of the table used to split work units</li> |
| <li><strong>direct</strong> – Use direct import fast path</li> |
| <li><strong>driver</strong> – Manually specify JDBC driver class to use</li> |
| <li><strong>extra_import_options</strong> – Extra import options to pass as dict. |
| If a key doesn’t have a value, just pass an empty string to it. |
| Don’t include prefix of – for sqoop options.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sqoop_hook.SqoopHook.import_table"> |
| <code class="descname">import_table</code><span class="sig-paren">(</span><em>table</em>, <em>target_dir=None</em>, <em>append=False</em>, <em>file_type='text'</em>, <em>columns=None</em>, <em>split_by=None</em>, <em>where=None</em>, <em>direct=False</em>, <em>driver=None</em>, <em>extra_import_options=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sqoop_hook.html#SqoopHook.import_table"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sqoop_hook.SqoopHook.import_table" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Imports table from remote location to target dir. Arguments are |
| copies of direct sqoop command line arguments |
| :param table: Table to read |
| :param target_dir: HDFS destination dir |
| :param append: Append data to an existing dataset in HDFS |
| :param file_type: “avro”, “sequence”, “text” or “parquet”.</p> |
| <blockquote> |
| <div>Imports data to into the specified format. Defaults to text.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>columns</strong> – <col,col,col…> Columns to import from table</li> |
| <li><strong>split_by</strong> – Column of the table used to split work units</li> |
| <li><strong>where</strong> – WHERE clause to use during import</li> |
| <li><strong>direct</strong> – Use direct connector if exists for the database</li> |
| <li><strong>driver</strong> – Manually specify JDBC driver class to use</li> |
| <li><strong>extra_import_options</strong> – Extra import options to pass as dict. |
| If a key doesn’t have a value, just pass an empty string to it. |
| Don’t include prefix of – for sqoop options.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.ssh_hook.SSHHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.ssh_hook.</code><code class="descname">SSHHook</code><span class="sig-paren">(</span><em>ssh_conn_id=None</em>, <em>remote_host=None</em>, <em>username=None</em>, <em>password=None</em>, <em>key_file=None</em>, <em>port=None</em>, <em>timeout=10</em>, <em>keepalive_interval=30</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ssh_hook.html#SSHHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ssh_hook.SSHHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Hook for ssh remote execution using Paramiko. |
| ref: <a class="reference external" href="https://github.com/paramiko/paramiko">https://github.com/paramiko/paramiko</a> |
| This hook also lets you create ssh tunnel and serve as basis for SFTP file transfer</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>ssh_conn_id</strong> (<em>str</em>) – connection id from airflow Connections from where all the required |
| parameters can be fetched like username, password or key_file. |
| Thought the priority is given to the param passed during init</li> |
| <li><strong>remote_host</strong> (<em>str</em>) – remote host to connect</li> |
| <li><strong>username</strong> (<em>str</em>) – username to connect to the remote_host</li> |
| <li><strong>password</strong> (<em>str</em>) – password of the username to connect to the remote_host</li> |
| <li><strong>key_file</strong> (<em>str</em>) – key file to use to connect to the remote_host.</li> |
| <li><strong>port</strong> (<em>int</em>) – port of remote host to connect (Default is paramiko SSH_PORT)</li> |
| <li><strong>timeout</strong> (<em>int</em>) – timeout for the attempt to connect to the remote_host.</li> |
| <li><strong>keepalive_interval</strong> (<em>int</em>) – send a keepalive packet to remote host every |
| keepalive_interval seconds</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ssh_hook.SSHHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ssh_hook.html#SSHHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ssh_hook.SSHHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Opens a ssh connection to the remote host.</p> |
| <p>:return paramiko.SSHClient object</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.ssh_hook.SSHHook.get_tunnel"> |
| <code class="descname">get_tunnel</code><span class="sig-paren">(</span><em>remote_port</em>, <em>remote_host='localhost'</em>, <em>local_port=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/ssh_hook.html#SSHHook.get_tunnel"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.ssh_hook.SSHHook.get_tunnel" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a tunnel between two hosts. Like ssh -L <LOCAL_PORT>:host:<REMOTE_PORT>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>remote_port</strong> (<em>int</em>) – The remote port to create a tunnel to</li> |
| <li><strong>remote_host</strong> (<em>str</em>) – The remote host to create a tunnel to (default localhost)</li> |
| <li><strong>local_port</strong> (<em>int</em>) – The local port to attach the tunnel to</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">sshtunnel.SSHTunnelForwarder object</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.vertica_hook.VerticaHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.vertica_hook.</code><code class="descname">VerticaHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/vertica_hook.html#VerticaHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.vertica_hook.VerticaHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a></p> |
| <p>Interact with Vertica.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.vertica_hook.VerticaHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/vertica_hook.html#VerticaHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.vertica_hook.VerticaHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns verticaql connection object</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.wasb_hook.</code><code class="descname">WasbHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure Blob Storage through the wasb:// protocol.</p> |
| <p>Additional options passed in the ‘extra’ field of the connection will be |
| passed to the <cite>BlockBlockService()</cite> constructor. For example, authenticate |
| using a SAS token by adding {“sas_token”: “YOUR_TOKEN”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob"> |
| <code class="descname">check_for_blob</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_blob"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a blob exists on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the blob exists, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix"> |
| <code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a prefix exists on Azure Blob storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.list_blobs()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if blobs matching the prefix exist, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.delete_file"> |
| <code class="descname">delete_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>is_prefix=False</em>, <em>ignore_if_missing=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.delete_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>is_prefix</strong> (<em>bool</em>) – If blob_name is a prefix, delete all matching files</li> |
| <li><strong>ignore_if_missing</strong> – if True, then return success even if the</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>blob does not exist. |
| :type ignore_if_missing: bool |
| :param kwargs: Optional keyword arguments that</p> |
| <blockquote> |
| <div><cite>BlockBlobService.create_blob_from_path()</cite> takes.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the BlockBlobService object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_file"> |
| <code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to download.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a string to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – String to load.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_text()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.read_file"> |
| <code class="descname">read_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.read_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.read_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Read a file from Azure Blob Storage and return as a string.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.winrm_hook.WinRMHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.winrm_hook.</code><code class="descname">WinRMHook</code><span class="sig-paren">(</span><em>ssh_conn_id=None</em>, <em>remote_host=None</em>, <em>username=None</em>, <em>password=None</em>, <em>key_file=None</em>, <em>timeout=10</em>, <em>keepalive_interval=30</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/winrm_hook.html#WinRMHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.winrm_hook.WinRMHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Hook for winrm remote execution using pywinrm.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>ssh_conn_id</strong> (<em>str</em>) – connection id from airflow Connections from where all |
| the required parameters can be fetched like username, password or key_file. |
| Thought the priority is given to the param passed during init</li> |
| <li><strong>remote_host</strong> (<em>str</em>) – remote host to connect</li> |
| <li><strong>username</strong> (<em>str</em>) – username to connect to the remote_host</li> |
| <li><strong>password</strong> (<em>str</em>) – password of the username to connect to the remote_host</li> |
| <li><strong>key_file</strong> (<em>str</em>) – key file to use to connect to the remote_host.</li> |
| <li><strong>timeout</strong> (<em>int</em>) – timeout for the attempt to connect to the remote_host.</li> |
| <li><strong>keepalive_interval</strong> (<em>int</em>) – send a keepalive packet to remote host |
| every keepalive_interval seconds</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="executors"> |
| <h2>Executors<a class="headerlink" href="#executors" title="Permalink to this headline">¶</a></h2> |
| <p>Executors are the mechanism by which task instances get run.</p> |
| <dl class="class"> |
| <dt id="airflow.executors.local_executor.LocalExecutor"> |
| <em class="property">class </em><code class="descclassname">airflow.executors.local_executor.</code><code class="descname">LocalExecutor</code><span class="sig-paren">(</span><em>parallelism=32</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/local_executor.html#LocalExecutor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.local_executor.LocalExecutor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.executors.base_executor.BaseExecutor</span></code></p> |
| <p>LocalExecutor executes tasks locally in parallel. It uses the |
| multiprocessing Python library and queues to parallelize the execution |
| of tasks.</p> |
| <dl class="method"> |
| <dt id="airflow.executors.local_executor.LocalExecutor.end"> |
| <code class="descname">end</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/local_executor.html#LocalExecutor.end"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.local_executor.LocalExecutor.end" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method is called when the caller is done submitting job and is |
| wants to wait synchronously for the job submitted previously to be |
| all done.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.local_executor.LocalExecutor.execute_async"> |
| <code class="descname">execute_async</code><span class="sig-paren">(</span><em>key</em>, <em>command</em>, <em>queue=None</em>, <em>executor_config=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/local_executor.html#LocalExecutor.execute_async"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.local_executor.LocalExecutor.execute_async" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method will execute the command asynchronously.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.local_executor.LocalExecutor.start"> |
| <code class="descname">start</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/local_executor.html#LocalExecutor.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.local_executor.LocalExecutor.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executors may need to get things started. For example LocalExecutor |
| starts N workers.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.local_executor.LocalExecutor.sync"> |
| <code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/local_executor.html#LocalExecutor.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.local_executor.LocalExecutor.sync" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sync will get called periodically by the heartbeat method. |
| Executors should override this to perform gather statuses.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.executors.celery_executor.CeleryExecutor"> |
| <em class="property">class </em><code class="descclassname">airflow.executors.celery_executor.</code><code class="descname">CeleryExecutor</code><span class="sig-paren">(</span><em>parallelism=32</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/celery_executor.html#CeleryExecutor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.celery_executor.CeleryExecutor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.executors.base_executor.BaseExecutor</span></code></p> |
| <p>CeleryExecutor is recommended for production use of Airflow. It allows |
| distributing the execution of task instances to multiple worker nodes.</p> |
| <p>Celery is a simple, flexible and reliable distributed system to process |
| vast amounts of messages, while providing operations with the tools |
| required to maintain such a system.</p> |
| <dl class="method"> |
| <dt id="airflow.executors.celery_executor.CeleryExecutor.end"> |
| <code class="descname">end</code><span class="sig-paren">(</span><em>synchronous=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/celery_executor.html#CeleryExecutor.end"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.celery_executor.CeleryExecutor.end" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method is called when the caller is done submitting job and is |
| wants to wait synchronously for the job submitted previously to be |
| all done.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.celery_executor.CeleryExecutor.execute_async"> |
| <code class="descname">execute_async</code><span class="sig-paren">(</span><em>key</em>, <em>command</em>, <em>queue='default'</em>, <em>executor_config=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/celery_executor.html#CeleryExecutor.execute_async"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.celery_executor.CeleryExecutor.execute_async" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method will execute the command asynchronously.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.celery_executor.CeleryExecutor.start"> |
| <code class="descname">start</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/celery_executor.html#CeleryExecutor.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.celery_executor.CeleryExecutor.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executors may need to get things started. For example LocalExecutor |
| starts N workers.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.celery_executor.CeleryExecutor.sync"> |
| <code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/celery_executor.html#CeleryExecutor.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.celery_executor.CeleryExecutor.sync" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sync will get called periodically by the heartbeat method. |
| Executors should override this to perform gather statuses.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.executors.sequential_executor.SequentialExecutor"> |
| <em class="property">class </em><code class="descclassname">airflow.executors.sequential_executor.</code><code class="descname">SequentialExecutor</code><a class="reference internal" href="_modules/airflow/executors/sequential_executor.html#SequentialExecutor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.sequential_executor.SequentialExecutor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.executors.base_executor.BaseExecutor</span></code></p> |
| <p>This executor will only run one task instance at a time, can be used |
| for debugging. It is also the only executor that can be used with sqlite |
| since sqlite doesn’t support multiple connections.</p> |
| <p>Since we want airflow to work out of the box, it defaults to this |
| SequentialExecutor alongside sqlite as you first install it.</p> |
| <dl class="method"> |
| <dt id="airflow.executors.sequential_executor.SequentialExecutor.end"> |
| <code class="descname">end</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/sequential_executor.html#SequentialExecutor.end"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.sequential_executor.SequentialExecutor.end" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method is called when the caller is done submitting job and is |
| wants to wait synchronously for the job submitted previously to be |
| all done.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.sequential_executor.SequentialExecutor.execute_async"> |
| <code class="descname">execute_async</code><span class="sig-paren">(</span><em>key</em>, <em>command</em>, <em>queue=None</em>, <em>executor_config=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/sequential_executor.html#SequentialExecutor.execute_async"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.sequential_executor.SequentialExecutor.execute_async" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method will execute the command asynchronously.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.executors.sequential_executor.SequentialExecutor.sync"> |
| <code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/executors/sequential_executor.html#SequentialExecutor.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.executors.sequential_executor.SequentialExecutor.sync" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sync will get called periodically by the heartbeat method. |
| Executors should override this to perform gather statuses.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <div class="section" id="community-contributed-executors"> |
| <h3>Community-contributed executors<a class="headerlink" href="#community-contributed-executors" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.executors.mesos_executor.MesosExecutor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.executors.mesos_executor.</code><code class="descname">MesosExecutor</code><span class="sig-paren">(</span><em>parallelism=32</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/mesos_executor.html#MesosExecutor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.mesos_executor.MesosExecutor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.executors.base_executor.BaseExecutor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.www.utils.LoginMixin</span></code></p> |
| <p>MesosExecutor allows distributing the execution of task |
| instances to multiple mesos workers.</p> |
| <p>Apache Mesos is a distributed systems kernel which abstracts |
| CPU, memory, storage, and other compute resources away from |
| machines (physical or virtual), enabling fault-tolerant and |
| elastic distributed systems to easily be built and run effectively. |
| See <a class="reference external" href="http://mesos.apache.org/">http://mesos.apache.org/</a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.mesos_executor.MesosExecutor.end"> |
| <code class="descname">end</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/mesos_executor.html#MesosExecutor.end"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.mesos_executor.MesosExecutor.end" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method is called when the caller is done submitting job and is |
| wants to wait synchronously for the job submitted previously to be |
| all done.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.mesos_executor.MesosExecutor.execute_async"> |
| <code class="descname">execute_async</code><span class="sig-paren">(</span><em>key</em>, <em>command</em>, <em>queue=None</em>, <em>executor_config=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/mesos_executor.html#MesosExecutor.execute_async"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.mesos_executor.MesosExecutor.execute_async" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method will execute the command asynchronously.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.mesos_executor.MesosExecutor.start"> |
| <code class="descname">start</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/mesos_executor.html#MesosExecutor.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.mesos_executor.MesosExecutor.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executors may need to get things started. For example LocalExecutor |
| starts N workers.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.mesos_executor.MesosExecutor.sync"> |
| <code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/mesos_executor.html#MesosExecutor.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.mesos_executor.MesosExecutor.sync" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sync will get called periodically by the heartbeat method. |
| Executors should override this to perform gather statuses.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.executors.kubernetes_executor.KubernetesExecutor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.executors.kubernetes_executor.</code><code class="descname">KubernetesExecutor</code><a class="reference internal" href="_modules/airflow/contrib/executors/kubernetes_executor.html#KubernetesExecutor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.kubernetes_executor.KubernetesExecutor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.executors.base_executor.BaseExecutor</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.clear_not_launched_queued_tasks"> |
| <code class="descname">clear_not_launched_queued_tasks</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/kubernetes_executor.html#KubernetesExecutor.clear_not_launched_queued_tasks"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.clear_not_launched_queued_tasks" title="Permalink to this definition">¶</a></dt> |
| <dd><p>If the airflow scheduler restarts with pending “Queued” tasks, the tasks may or |
| may not |
| have been launched Thus, on starting up the scheduler let’s check every |
| “Queued” task to |
| see if it has been launched (ie: if there is a corresponding pod on kubernetes)</p> |
| <p>If it has been launched then do nothing, otherwise reset the state to “None” so |
| the task |
| will be rescheduled</p> |
| <p>This will not be necessary in a future version of airflow in which there is |
| proper support |
| for State.LAUNCHED</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.end"> |
| <code class="descname">end</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/kubernetes_executor.html#KubernetesExecutor.end"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.end" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method is called when the caller is done submitting job and is |
| wants to wait synchronously for the job submitted previously to be |
| all done.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.execute_async"> |
| <code class="descname">execute_async</code><span class="sig-paren">(</span><em>key</em>, <em>command</em>, <em>queue=None</em>, <em>executor_config=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/kubernetes_executor.html#KubernetesExecutor.execute_async"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.execute_async" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This method will execute the command asynchronously.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.start"> |
| <code class="descname">start</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/kubernetes_executor.html#KubernetesExecutor.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Executors may need to get things started. For example LocalExecutor |
| starts N workers.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.sync"> |
| <code class="descname">sync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/executors/kubernetes_executor.html#KubernetesExecutor.sync"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.executors.kubernetes_executor.KubernetesExecutor.sync" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sync will get called periodically by the heartbeat method. |
| Executors should override this to perform gather statuses.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| |
| <a href="faq.html" class="btn btn-neutral" title="FAQ" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script> |
| <script type="text/javascript" src="_static/jquery.js"></script> |
| <script type="text/javascript" src="_static/underscore.js"></script> |
| <script type="text/javascript" src="_static/doctools.js"></script> |
| <script type="text/javascript" src="_static/language_data.js"></script> |
| |
| |
| |
| |
| <script type="text/javascript" src="_static/js/theme.js"></script> |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |