blob: e613291a7260c3888909508ddd935c4221f3f0e7 [file] [log] [blame]
<!--
Javascript to render AIRFLOW-XXX and PR references in text
as HTML links.
Overrides extrahead block from sphinx_rtd_theme
https://www.sphinx-doc.org/en/master/templating.html
-->
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tutorial &mdash; Airflow Documentation</title>
<link rel="shortcut icon" href="_static/pin_32.png"/>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script type="text/javascript" src="_static/jira-links.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/graphviz.css" type="text/css" />
<link rel="stylesheet" href="_static/exampleinclude.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="How-to Guides" href="howto/index.html" />
<link rel="prev" title="Installation" href="installation.html" />
<script>
</script>
<style>
</style>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.8
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Tutorial</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#example-pipeline-definition">Example Pipeline definition</a></li>
<li class="toctree-l2"><a class="reference internal" href="#it-s-a-dag-definition-file">It’s a DAG definition file</a></li>
<li class="toctree-l2"><a class="reference internal" href="#importing-modules">Importing Modules</a></li>
<li class="toctree-l2"><a class="reference internal" href="#default-arguments">Default Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="#instantiate-a-dag">Instantiate a DAG</a></li>
<li class="toctree-l2"><a class="reference internal" href="#tasks">Tasks</a></li>
<li class="toctree-l2"><a class="reference internal" href="#templating-with-jinja">Templating with Jinja</a></li>
<li class="toctree-l2"><a class="reference internal" href="#adding-dag-and-tasks-documentation">Adding DAG and Tasks documentation</a></li>
<li class="toctree-l2"><a class="reference internal" href="#id1">Adding DAG and Tasks documentation</a></li>
<li class="toctree-l2"><a class="reference internal" href="#setting-up-dependencies">Setting up Dependencies</a></li>
<li class="toctree-l2"><a class="reference internal" href="#recap">Recap</a></li>
<li class="toctree-l2"><a class="reference internal" href="#testing">Testing</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#running-the-script">Running the Script</a></li>
<li class="toctree-l3"><a class="reference internal" href="#command-line-metadata-validation">Command Line Metadata Validation</a></li>
<li class="toctree-l3"><a class="reference internal" href="#id3">Testing</a></li>
<li class="toctree-l3"><a class="reference internal" href="#backfill">Backfill</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#what-s-next">What’s Next?</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="executor/index.html">Executor</a></li>
<li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="api.html">REST API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="errors.html">Error Tracking</a></li>
<li class="toctree-l1"><a class="reference internal" href="kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="dag-serialization.html">DAG Serialization</a></li>
<li class="toctree-l1"><a class="reference internal" href="changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="best-practices.html">Best Practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="macros.html">Macros reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="privacy_notice.html">Privacy Notice</a></li>
</ul>
<p class="caption"><span class="caption-text">References</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="_api/index.html">Python API</a></li>
<li class="toctree-l1"><a class="reference internal" href="configurations-ref.html">Configurations</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li>Tutorial</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/tutorial.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<blockquote>
<div></div></blockquote>
<div class="section" id="tutorial">
<h1>Tutorial<a class="headerlink" href="#tutorial" title="Permalink to this headline"></a></h1>
<p>This tutorial walks you through some of the fundamental Airflow concepts,
objects, and their usage while writing your first pipeline.</p>
<div class="section" id="example-pipeline-definition">
<h2>Example Pipeline definition<a class="headerlink" href="#example-pipeline-definition" title="Permalink to this headline"></a></h2>
<p>Here is an example of a basic pipeline definition. Do not worry if this looks
complicated, a line by line explanation follows below.</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">timedelta</span>
<span class="c1"># The DAG object; we&#39;ll need this to instantiate a DAG</span>
<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">DAG</span>
<span class="c1"># Operators; we need this to operate!</span>
<span class="kn">from</span> <span class="nn">airflow.operators.bash_operator</span> <span class="kn">import</span> <span class="n">BashOperator</span>
<span class="kn">from</span> <span class="nn">airflow.utils.dates</span> <span class="kn">import</span> <span class="n">days_ago</span>
<span class="c1"># These args will get passed on to each operator</span>
<span class="c1"># You can override them on a per-task basis during operator initialization</span>
<span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;owner&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">,</span>
<span class="s1">&#39;depends_on_past&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;start_date&#39;</span><span class="p">:</span> <span class="n">days_ago</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
<span class="s1">&#39;email&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;airflow@example.com&#39;</span><span class="p">],</span>
<span class="s1">&#39;email_on_failure&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;email_on_retry&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;retries&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;retry_delay&#39;</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">5</span><span class="p">),</span>
<span class="c1"># &#39;queue&#39;: &#39;bash_queue&#39;,</span>
<span class="c1"># &#39;pool&#39;: &#39;backfill&#39;,</span>
<span class="c1"># &#39;priority_weight&#39;: 10,</span>
<span class="c1"># &#39;end_date&#39;: datetime(2016, 1, 1),</span>
<span class="c1"># &#39;wait_for_downstream&#39;: False,</span>
<span class="c1"># &#39;dag&#39;: dag,</span>
<span class="c1"># &#39;sla&#39;: timedelta(hours=2),</span>
<span class="c1"># &#39;execution_timeout&#39;: timedelta(seconds=300),</span>
<span class="c1"># &#39;on_failure_callback&#39;: some_function,</span>
<span class="c1"># &#39;on_success_callback&#39;: some_other_function,</span>
<span class="c1"># &#39;on_retry_callback&#39;: another_function,</span>
<span class="c1"># &#39;sla_miss_callback&#39;: yet_another_function,</span>
<span class="c1"># &#39;trigger_rule&#39;: &#39;all_success&#39;</span>
<span class="p">}</span>
<span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span>
<span class="s1">&#39;tutorial&#39;</span><span class="p">,</span>
<span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span><span class="s1">&#39;A simple tutorial DAG&#39;</span><span class="p">,</span>
<span class="n">schedule_interval</span><span class="o">=</span><span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
<span class="p">)</span>
<span class="c1"># t1, t2 and t3 are examples of tasks created by instantiating operators</span>
<span class="n">t1</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;print_date&#39;</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="s1">&#39;date&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">t2</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;sleep&#39;</span><span class="p">,</span>
<span class="n">depends_on_past</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="s1">&#39;sleep 5&#39;</span><span class="p">,</span>
<span class="n">retries</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">dag</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="vm">__doc__</span>
<span class="n">t1</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span><span class="se">\</span>
<span class="s2">#### Task Documentation</span>
<span class="s2">You can document your task using the attributes `doc_md` (markdown),</span>
<span class="s2">`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets</span>
<span class="s2">rendered in the UI&#39;s Task Instance Details page.</span>
<span class="s2">![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import</span><span class="si">%20s</span><span class="s2">oul.png)</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">templated_command</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">{</span><span class="si">% f</span><span class="s2">or i in range(5) %}</span>
<span class="s2"> echo &quot;{{ ds }}&quot;</span>
<span class="s2"> echo &quot;{{ macros.ds_add(ds, 7)}}&quot;</span>
<span class="s2"> echo &quot;{{ params.my_param }}&quot;</span>
<span class="s2">{</span><span class="si">% e</span><span class="s2">ndfor %}</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">t3</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;templated&#39;</span><span class="p">,</span>
<span class="n">depends_on_past</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="n">templated_command</span><span class="p">,</span>
<span class="n">params</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;my_param&#39;</span><span class="p">:</span> <span class="s1">&#39;Parameter I passed in&#39;</span><span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">t1</span> <span class="o">&gt;&gt;</span> <span class="p">[</span><span class="n">t2</span><span class="p">,</span> <span class="n">t3</span><span class="p">]</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="it-s-a-dag-definition-file">
<h2>It’s a DAG definition file<a class="headerlink" href="#it-s-a-dag-definition-file" title="Permalink to this headline"></a></h2>
<p>One thing to wrap your head around (it may not be very intuitive for everyone
at first) is that this Airflow Python script is really
just a configuration file specifying the DAG’s structure as code.
The actual tasks defined here will run in a different context from
the context of this script. Different tasks run on different workers
at different points in time, which means that this script cannot be used
to cross communicate between tasks. Note that for this
purpose we have a more advanced feature called <code class="docutils literal notranslate"><span class="pre">XCom</span></code>.</p>
<p>People sometimes think of the DAG definition file as a place where they
can do some actual data processing - that is not the case at all!
The script’s purpose is to define a DAG object. It needs to evaluate
quickly (seconds, not minutes) since the scheduler will execute it
periodically to reflect the changes if any.</p>
</div>
<div class="section" id="importing-modules">
<h2>Importing Modules<a class="headerlink" href="#importing-modules" title="Permalink to this headline"></a></h2>
<p>An Airflow pipeline is just a Python script that happens to define an
Airflow DAG object. Let’s start by importing the libraries we will need.</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># The DAG object; we&#39;ll need this to instantiate a DAG</span>
<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">DAG</span>
<span class="c1"># Operators; we need this to operate!</span>
<span class="kn">from</span> <span class="nn">airflow.operators.bash_operator</span> <span class="kn">import</span> <span class="n">BashOperator</span>
<span class="kn">from</span> <span class="nn">airflow.utils.dates</span> <span class="kn">import</span> <span class="n">days_ago</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="default-arguments">
<h2>Default Arguments<a class="headerlink" href="#default-arguments" title="Permalink to this headline"></a></h2>
<p>We’re about to create a DAG and some tasks, and we have the choice to
explicitly pass a set of arguments to each task’s constructor
(which would become redundant), or (better!) we can define a dictionary
of default parameters that we can use when creating tasks.</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># These args will get passed on to each operator</span>
<span class="c1"># You can override them on a per-task basis during operator initialization</span>
<span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;owner&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">,</span>
<span class="s1">&#39;depends_on_past&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;start_date&#39;</span><span class="p">:</span> <span class="n">days_ago</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
<span class="s1">&#39;email&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;airflow@example.com&#39;</span><span class="p">],</span>
<span class="s1">&#39;email_on_failure&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;email_on_retry&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;retries&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;retry_delay&#39;</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">5</span><span class="p">),</span>
<span class="c1"># &#39;queue&#39;: &#39;bash_queue&#39;,</span>
<span class="c1"># &#39;pool&#39;: &#39;backfill&#39;,</span>
<span class="c1"># &#39;priority_weight&#39;: 10,</span>
<span class="c1"># &#39;end_date&#39;: datetime(2016, 1, 1),</span>
<span class="c1"># &#39;wait_for_downstream&#39;: False,</span>
<span class="c1"># &#39;dag&#39;: dag,</span>
<span class="c1"># &#39;sla&#39;: timedelta(hours=2),</span>
<span class="c1"># &#39;execution_timeout&#39;: timedelta(seconds=300),</span>
<span class="c1"># &#39;on_failure_callback&#39;: some_function,</span>
<span class="c1"># &#39;on_success_callback&#39;: some_other_function,</span>
<span class="c1"># &#39;on_retry_callback&#39;: another_function,</span>
<span class="c1"># &#39;sla_miss_callback&#39;: yet_another_function,</span>
<span class="c1"># &#39;trigger_rule&#39;: &#39;all_success&#39;</span>
<span class="p">}</span>
</pre></div>
</div>
</div>
<p>For more information about the BaseOperator’s parameters and what they do,
refer to the <a class="reference internal" href="_api/airflow/models/index.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a> documentation.</p>
<p>Also, note that you could easily define different sets of arguments that
would serve different purposes. An example of that would be to have
different settings between a production and development environment.</p>
</div>
<div class="section" id="instantiate-a-dag">
<h2>Instantiate a DAG<a class="headerlink" href="#instantiate-a-dag" title="Permalink to this headline"></a></h2>
<p>We’ll need a DAG object to nest our tasks into. Here we pass a string
that defines the <code class="docutils literal notranslate"><span class="pre">dag_id</span></code>, which serves as a unique identifier for your DAG.
We also pass the default argument dictionary that we just defined and
define a <code class="docutils literal notranslate"><span class="pre">schedule_interval</span></code> of 1 day for the DAG.</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span>
<span class="s1">&#39;tutorial&#39;</span><span class="p">,</span>
<span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span><span class="s1">&#39;A simple tutorial DAG&#39;</span><span class="p">,</span>
<span class="n">schedule_interval</span><span class="o">=</span><span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
<span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="tasks">
<h2>Tasks<a class="headerlink" href="#tasks" title="Permalink to this headline"></a></h2>
<p>Tasks are generated when instantiating operator objects. An object
instantiated from an operator is called a constructor. The first argument
<code class="docutils literal notranslate"><span class="pre">task_id</span></code> acts as a unique identifier for the task.</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;print_date&#39;</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="s1">&#39;date&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">t2</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;sleep&#39;</span><span class="p">,</span>
<span class="n">depends_on_past</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="s1">&#39;sleep 5&#39;</span><span class="p">,</span>
<span class="n">retries</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
</pre></div>
</div>
</div>
<p>Notice how we pass a mix of operator specific arguments (<code class="docutils literal notranslate"><span class="pre">bash_command</span></code>) and
an argument common to all operators (<code class="docutils literal notranslate"><span class="pre">retries</span></code>) inherited
from BaseOperator to the operator’s constructor. This is simpler than
passing every argument for every constructor call. Also, notice that in
the second task we override the <code class="docutils literal notranslate"><span class="pre">retries</span></code> parameter with <code class="docutils literal notranslate"><span class="pre">3</span></code>.</p>
<p>The precedence rules for a task are as follows:</p>
<ol class="arabic simple">
<li><p>Explicitly passed arguments</p></li>
<li><p>Values that exist in the <code class="docutils literal notranslate"><span class="pre">default_args</span></code> dictionary</p></li>
<li><p>The operator’s default value, if one exists</p></li>
</ol>
<p>A task must include or inherit the arguments <code class="docutils literal notranslate"><span class="pre">task_id</span></code> and <code class="docutils literal notranslate"><span class="pre">owner</span></code>,
otherwise Airflow will raise an exception.</p>
</div>
<div class="section" id="templating-with-jinja">
<h2>Templating with Jinja<a class="headerlink" href="#templating-with-jinja" title="Permalink to this headline"></a></h2>
<p>Airflow leverages the power of
<a class="reference external" href="http://jinja.pocoo.org/docs/dev/">Jinja Templating</a> and provides
the pipeline author
with a set of built-in parameters and macros. Airflow also provides
hooks for the pipeline author to define their own parameters, macros and
templates.</p>
<p>This tutorial barely scratches the surface of what you can do with
templating in Airflow, but the goal of this section is to let you know
this feature exists, get you familiar with double curly brackets, and
point to the most common template variable: <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code> (today’s “date
stamp”).</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">templated_command</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">{</span><span class="si">% f</span><span class="s2">or i in range(5) %}</span>
<span class="s2"> echo &quot;{{ ds }}&quot;</span>
<span class="s2"> echo &quot;{{ macros.ds_add(ds, 7)}}&quot;</span>
<span class="s2"> echo &quot;{{ params.my_param }}&quot;</span>
<span class="s2">{</span><span class="si">% e</span><span class="s2">ndfor %}</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">t3</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;templated&#39;</span><span class="p">,</span>
<span class="n">depends_on_past</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="n">templated_command</span><span class="p">,</span>
<span class="n">params</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;my_param&#39;</span><span class="p">:</span> <span class="s1">&#39;Parameter I passed in&#39;</span><span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
</pre></div>
</div>
</div>
<p>Notice that the <code class="docutils literal notranslate"><span class="pre">templated_command</span></code> contains code logic in <code class="docutils literal notranslate"><span class="pre">{%</span> <span class="pre">%}</span></code> blocks,
references parameters like <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">ds</span> <span class="pre">}}</span></code>, calls a function as in
<code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">macros.ds_add(ds,</span> <span class="pre">7)}}</span></code>, and references a user-defined parameter
in <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">params.my_param</span> <span class="pre">}}</span></code>.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">params</span></code> hook in <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code> allows you to pass a dictionary of
parameters and/or objects to your templates. Please take the time
to understand how the parameter <code class="docutils literal notranslate"><span class="pre">my_param</span></code> makes it through to the template.</p>
<p>Files can also be passed to the <code class="docutils literal notranslate"><span class="pre">bash_command</span></code> argument, like
<code class="docutils literal notranslate"><span class="pre">bash_command='templated_command.sh'</span></code>, where the file location is relative to
the directory containing the pipeline file (<code class="docutils literal notranslate"><span class="pre">tutorial.py</span></code> in this case). This
may be desirable for many reasons, like separating your script’s logic and
pipeline code, allowing for proper code highlighting in files composed in
different languages, and general flexibility in structuring pipelines. It is
also possible to define your <code class="docutils literal notranslate"><span class="pre">template_searchpath</span></code> as pointing to any folder
locations in the DAG constructor call.</p>
<p>Using that same DAG constructor call, it is possible to define
<code class="docutils literal notranslate"><span class="pre">user_defined_macros</span></code> which allow you to specify your own variables.
For example, passing <code class="docutils literal notranslate"><span class="pre">dict(foo='bar')</span></code> to this argument allows you
to use <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">foo</span> <span class="pre">}}</span></code> in your templates. Moreover, specifying
<code class="docutils literal notranslate"><span class="pre">user_defined_filters</span></code> allow you to register you own filters. For example,
passing <code class="docutils literal notranslate"><span class="pre">dict(hello=lambda</span> <span class="pre">name:</span> <span class="pre">'Hello</span> <span class="pre">%s'</span> <span class="pre">%</span> <span class="pre">name)</span></code> to this argument allows
you to use <code class="docutils literal notranslate"><span class="pre">{{</span> <span class="pre">'world'</span> <span class="pre">|</span> <span class="pre">hello</span> <span class="pre">}}</span></code> in your templates. For more information
regarding custom filters have a look at the
<a class="reference external" href="http://jinja.pocoo.org/docs/dev/api/#writing-filters">Jinja Documentation</a></p>
<p>For more information on the variables and macros that can be referenced
in templates, make sure to read through the <a class="reference internal" href="macros.html"><span class="doc">Macros reference</span></a></p>
</div>
<div class="section" id="adding-dag-and-tasks-documentation">
<h2>Adding DAG and Tasks documentation<a class="headerlink" href="#adding-dag-and-tasks-documentation" title="Permalink to this headline"></a></h2>
<p>We can add documentation for DAG or each single task. DAG documentation only support
markdown so far and task documentation support plain text, markdown, reStructuredText,
json, yaml</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dag</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="vm">__doc__</span>
<span class="n">t1</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span><span class="se">\</span>
<span class="s2">#### Task Documentation</span>
<span class="s2">You can document your task using the attributes `doc_md` (markdown),</span>
<span class="s2">`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets</span>
<span class="s2">rendered in the UI&#39;s Task Instance Details page.</span>
<span class="s2">![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import</span><span class="si">%20s</span><span class="s2">oul.png)</span>
<span class="s2">&quot;&quot;&quot;</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="id1">
<h2>Adding DAG and Tasks documentation<a class="headerlink" href="#id1" title="Permalink to this headline"></a></h2>
<p>We can add documentation for DAG or each single task. DAG documentation only support
markdown so far and task documentation support plain text, markdown, reStructuredText,
json, yaml</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dag</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="vm">__doc__</span>
<span class="n">t1</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span><span class="se">\</span>
<span class="s2">#### Task Documentation</span>
<span class="s2">You can document your task using the attributes `doc_md` (markdown),</span>
<span class="s2">`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets</span>
<span class="s2">rendered in the UI&#39;s Task Instance Details page.</span>
<span class="s2">![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import</span><span class="si">%20s</span><span class="s2">oul.png)</span>
<span class="s2">&quot;&quot;&quot;</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="setting-up-dependencies">
<h2>Setting up Dependencies<a class="headerlink" href="#setting-up-dependencies" title="Permalink to this headline"></a></h2>
<p>We have tasks <code class="docutils literal notranslate"><span class="pre">t1</span></code>, <code class="docutils literal notranslate"><span class="pre">t2</span></code> and <code class="docutils literal notranslate"><span class="pre">t3</span></code> that do not depend on each other. Here’s a few ways
you can define dependencies between them:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span><span class="o">.</span><span class="n">set_downstream</span><span class="p">(</span><span class="n">t2</span><span class="p">)</span>
<span class="c1"># This means that t2 will depend on t1</span>
<span class="c1"># running successfully to run.</span>
<span class="c1"># It is equivalent to:</span>
<span class="n">t2</span><span class="o">.</span><span class="n">set_upstream</span><span class="p">(</span><span class="n">t1</span><span class="p">)</span>
<span class="c1"># The bit shift operator can also be</span>
<span class="c1"># used to chain operations:</span>
<span class="n">t1</span> <span class="o">&gt;&gt;</span> <span class="n">t2</span>
<span class="c1"># And the upstream dependency with the</span>
<span class="c1"># bit shift operator:</span>
<span class="n">t2</span> <span class="o">&lt;&lt;</span> <span class="n">t1</span>
<span class="c1"># Chaining multiple dependencies becomes</span>
<span class="c1"># concise with the bit shift operator:</span>
<span class="n">t1</span> <span class="o">&gt;&gt;</span> <span class="n">t2</span> <span class="o">&gt;&gt;</span> <span class="n">t3</span>
<span class="c1"># A list of tasks can also be set as</span>
<span class="c1"># dependencies. These operations</span>
<span class="c1"># all have the same effect:</span>
<span class="n">t1</span><span class="o">.</span><span class="n">set_downstream</span><span class="p">([</span><span class="n">t2</span><span class="p">,</span> <span class="n">t3</span><span class="p">])</span>
<span class="n">t1</span> <span class="o">&gt;&gt;</span> <span class="p">[</span><span class="n">t2</span><span class="p">,</span> <span class="n">t3</span><span class="p">]</span>
<span class="p">[</span><span class="n">t2</span><span class="p">,</span> <span class="n">t3</span><span class="p">]</span> <span class="o">&lt;&lt;</span> <span class="n">t1</span>
</pre></div>
</div>
<p>Note that when executing your script, Airflow will raise exceptions when
it finds cycles in your DAG or when a dependency is referenced more
than once.</p>
</div>
<div class="section" id="recap">
<h2>Recap<a class="headerlink" href="#recap" title="Permalink to this headline"></a></h2>
<p>Alright, so we have a pretty basic DAG. At this point your code should look
something like this:</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/example_dags/tutorial.py</span><a class="example-header-button viewcode-button reference internal" href="_modules/airflow/example_dags/tutorial.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">timedelta</span>
<span class="c1"># The DAG object; we&#39;ll need this to instantiate a DAG</span>
<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">DAG</span>
<span class="c1"># Operators; we need this to operate!</span>
<span class="kn">from</span> <span class="nn">airflow.operators.bash_operator</span> <span class="kn">import</span> <span class="n">BashOperator</span>
<span class="kn">from</span> <span class="nn">airflow.utils.dates</span> <span class="kn">import</span> <span class="n">days_ago</span>
<span class="c1"># These args will get passed on to each operator</span>
<span class="c1"># You can override them on a per-task basis during operator initialization</span>
<span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;owner&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">,</span>
<span class="s1">&#39;depends_on_past&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;start_date&#39;</span><span class="p">:</span> <span class="n">days_ago</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
<span class="s1">&#39;email&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;airflow@example.com&#39;</span><span class="p">],</span>
<span class="s1">&#39;email_on_failure&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;email_on_retry&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;retries&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;retry_delay&#39;</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">5</span><span class="p">),</span>
<span class="c1"># &#39;queue&#39;: &#39;bash_queue&#39;,</span>
<span class="c1"># &#39;pool&#39;: &#39;backfill&#39;,</span>
<span class="c1"># &#39;priority_weight&#39;: 10,</span>
<span class="c1"># &#39;end_date&#39;: datetime(2016, 1, 1),</span>
<span class="c1"># &#39;wait_for_downstream&#39;: False,</span>
<span class="c1"># &#39;dag&#39;: dag,</span>
<span class="c1"># &#39;sla&#39;: timedelta(hours=2),</span>
<span class="c1"># &#39;execution_timeout&#39;: timedelta(seconds=300),</span>
<span class="c1"># &#39;on_failure_callback&#39;: some_function,</span>
<span class="c1"># &#39;on_success_callback&#39;: some_other_function,</span>
<span class="c1"># &#39;on_retry_callback&#39;: another_function,</span>
<span class="c1"># &#39;sla_miss_callback&#39;: yet_another_function,</span>
<span class="c1"># &#39;trigger_rule&#39;: &#39;all_success&#39;</span>
<span class="p">}</span>
<span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span>
<span class="s1">&#39;tutorial&#39;</span><span class="p">,</span>
<span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span><span class="s1">&#39;A simple tutorial DAG&#39;</span><span class="p">,</span>
<span class="n">schedule_interval</span><span class="o">=</span><span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
<span class="p">)</span>
<span class="c1"># t1, t2 and t3 are examples of tasks created by instantiating operators</span>
<span class="n">t1</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;print_date&#39;</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="s1">&#39;date&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">t2</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;sleep&#39;</span><span class="p">,</span>
<span class="n">depends_on_past</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="s1">&#39;sleep 5&#39;</span><span class="p">,</span>
<span class="n">retries</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">dag</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="vm">__doc__</span>
<span class="n">t1</span><span class="o">.</span><span class="n">doc_md</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span><span class="se">\</span>
<span class="s2">#### Task Documentation</span>
<span class="s2">You can document your task using the attributes `doc_md` (markdown),</span>
<span class="s2">`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets</span>
<span class="s2">rendered in the UI&#39;s Task Instance Details page.</span>
<span class="s2">![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import</span><span class="si">%20s</span><span class="s2">oul.png)</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">templated_command</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">{</span><span class="si">% f</span><span class="s2">or i in range(5) %}</span>
<span class="s2"> echo &quot;{{ ds }}&quot;</span>
<span class="s2"> echo &quot;{{ macros.ds_add(ds, 7)}}&quot;</span>
<span class="s2"> echo &quot;{{ params.my_param }}&quot;</span>
<span class="s2">{</span><span class="si">% e</span><span class="s2">ndfor %}</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">t3</span> <span class="o">=</span> <span class="n">BashOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;templated&#39;</span><span class="p">,</span>
<span class="n">depends_on_past</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">bash_command</span><span class="o">=</span><span class="n">templated_command</span><span class="p">,</span>
<span class="n">params</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;my_param&#39;</span><span class="p">:</span> <span class="s1">&#39;Parameter I passed in&#39;</span><span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">t1</span> <span class="o">&gt;&gt;</span> <span class="p">[</span><span class="n">t2</span><span class="p">,</span> <span class="n">t3</span><span class="p">]</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="testing">
<span id="id2"></span><h2>Testing<a class="headerlink" href="#testing" title="Permalink to this headline"></a></h2>
<div class="section" id="running-the-script">
<h3>Running the Script<a class="headerlink" href="#running-the-script" title="Permalink to this headline"></a></h3>
<p>Time to run some tests. First, let’s make sure the pipeline
is parsed successfully.</p>
<p>Let’s assume we’re saving the code from the previous step in
<code class="docutils literal notranslate"><span class="pre">tutorial.py</span></code> in the DAGs folder referenced in your <code class="docutils literal notranslate"><span class="pre">airflow.cfg</span></code>.
The default location for your DAGs is <code class="docutils literal notranslate"><span class="pre">~/airflow/dags</span></code>.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python ~/airflow/dags/tutorial.py
</pre></div>
</div>
<p>If the script does not raise an exception it means that you haven’t done
anything horribly wrong, and that your Airflow environment is somewhat
sound.</p>
</div>
<div class="section" id="command-line-metadata-validation">
<h3>Command Line Metadata Validation<a class="headerlink" href="#command-line-metadata-validation" title="Permalink to this headline"></a></h3>
<p>Let’s run a few commands to validate this script further.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># print the list of active DAGs</span>
airflow list_dags
<span class="c1"># prints the list of tasks the &quot;tutorial&quot; dag_id</span>
airflow list_tasks tutorial
<span class="c1"># prints the hierarchy of tasks in the tutorial DAG</span>
airflow list_tasks tutorial --tree
</pre></div>
</div>
</div>
<div class="section" id="id3">
<h3>Testing<a class="headerlink" href="#id3" title="Permalink to this headline"></a></h3>
<p>Let’s test by running the actual task instances for a specific date. The
date specified in this context is called <code class="docutils literal notranslate"><span class="pre">execution_date</span></code>. This is the
<em>logical</em> date, which simulates the scheduler running your task or dag at
a specific date and time, even though it <em>physically</em> will run now (
or as soon as its dependencies are met).</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># command layout: command subcommand dag_id task_id date</span>
<span class="c1"># testing print_date</span>
airflow <span class="nb">test</span> tutorial print_date <span class="m">2015</span>-06-01
<span class="c1"># testing sleep</span>
airflow <span class="nb">test</span> tutorial sleep <span class="m">2015</span>-06-01
</pre></div>
</div>
<p>Now remember what we did with templating earlier? See how this template
gets rendered and executed by running this command:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># testing templated</span>
airflow <span class="nb">test</span> tutorial templated <span class="m">2015</span>-06-01
</pre></div>
</div>
<p>This should result in displaying a verbose log of events and ultimately
running your bash command and printing the result.</p>
<p>Note that the <code class="docutils literal notranslate"><span class="pre">airflow</span> <span class="pre">test</span></code> command runs task instances locally, outputs
their log to stdout (on screen), doesn’t bother with dependencies, and
doesn’t communicate state (running, success, failed, …) to the database.
It simply allows testing a single task instance.</p>
</div>
<div class="section" id="backfill">
<h3>Backfill<a class="headerlink" href="#backfill" title="Permalink to this headline"></a></h3>
<p>Everything looks like it’s running fine so let’s run a backfill.
<code class="docutils literal notranslate"><span class="pre">backfill</span></code> will respect your dependencies, emit logs into files and talk to
the database to record status. If you do have a webserver up, you’ll be able
to track the progress. <code class="docutils literal notranslate"><span class="pre">airflow</span> <span class="pre">webserver</span></code> will start a web server if you
are interested in tracking the progress visually as your backfill progresses.</p>
<p>Note that if you use <code class="docutils literal notranslate"><span class="pre">depends_on_past=True</span></code>, individual task instances
will depend on the success of their previous task instance (that is, previous
according to <code class="docutils literal notranslate"><span class="pre">execution_date</span></code>). Task instances with <code class="docutils literal notranslate"><span class="pre">execution_date==start_date</span></code>
will disregard this dependency because there would be no
past task instances created for them.</p>
<p>You may also want to consider <code class="docutils literal notranslate"><span class="pre">wait_for_downstream=True</span></code> when using <code class="docutils literal notranslate"><span class="pre">depends_on_past=True</span></code>.
While <code class="docutils literal notranslate"><span class="pre">depends_on_past=True</span></code> causes a task instance to depend on the success
of its previous task_instance, <code class="docutils literal notranslate"><span class="pre">wait_for_downstream=True</span></code> will cause a task instance
to also wait for all task instances <em>immediately downstream</em> of the previous
task instance to succeed.</p>
<p>The date range in this context is a <code class="docutils literal notranslate"><span class="pre">start_date</span></code> and optionally an <code class="docutils literal notranslate"><span class="pre">end_date</span></code>,
which are used to populate the run schedule with task instances from this dag.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># optional, start a web server in debug mode in the background</span>
<span class="c1"># airflow webserver --debug &amp;</span>
<span class="c1"># start your backfill on a date range</span>
airflow backfill tutorial -s <span class="m">2015</span>-06-01 -e <span class="m">2015</span>-06-07
</pre></div>
</div>
</div>
</div>
<div class="section" id="what-s-next">
<h2>What’s Next?<a class="headerlink" href="#what-s-next" title="Permalink to this headline"></a></h2>
<p>That’s it, you’ve written, tested and backfilled your very first Airflow
pipeline. Merging your code into a code repository that has a master scheduler
running against it should get it to get triggered and run every day.</p>
<p>Here’s a few things you might want to do next:</p>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<ul class="simple">
<li><p>Read the <a class="reference internal" href="concepts.html#concepts"><span class="std std-ref">Concepts page</span></a> for detailed explanation
of Airflow concepts such as DAGs, Tasks, Operators, etc.</p></li>
<li><p>Take an in-depth tour of the UI - click all the things!</p></li>
<li><p>Keep reading the docs!</p>
<ul>
<li><p>Review the <a class="reference internal" href="howto/index.html"><span class="doc">how-to guides</span></a>, which include a guide to writing your own operator</p></li>
<li><p>Review the <a class="reference internal" href="cli.html#cli"><span class="std std-ref">Command Line Interface Reference</span></a></p></li>
<li><p>Review the <a class="reference internal" href="_api/index.html#pythonapi-operators"><span class="std std-ref">List of operators</span></a></p></li>
<li><p>Review the <a class="reference internal" href="macros.html#macros"><span class="std std-ref">Macros reference</span></a></p></li>
</ul>
</li>
<li><p>Write your first pipeline!</p></li>
</ul>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="howto/index.html" class="btn btn-neutral float-right" title="How-to Guides" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="installation.html" class="btn btn-neutral float-left" title="Installation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
<div class="footer">This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
the JavaScript coming from www.google-analytics.com. Check our
<a href="privacy_notice.html">Privacy Policy</a>
for more details.
</div>
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<!-- Theme Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-140539454-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>