blob: 6daeffb8349292159c782ac0c138639667ed73be [file] [log] [blame]
<!--
Javascript to render AIRFLOW-XXX and PR references in text
as HTML links.
Overrides extrahead block from sphinx_rtd_theme
https://www.sphinx-doc.org/en/master/templating.html
-->
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Papermill &mdash; Airflow Documentation</title>
<link rel="shortcut icon" href="../../_static/pin_32.png"/>
<script type="text/javascript" src="../../_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../_static/jquery.js"></script>
<script type="text/javascript" src="../../_static/underscore.js"></script>
<script type="text/javascript" src="../../_static/doctools.js"></script>
<script type="text/javascript" src="../../_static/language_data.js"></script>
<script type="text/javascript" src="../../_static/js/theme.js"></script>
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/graphviz.css" type="text/css" />
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="PythonOperator" href="python.html" />
<link rel="prev" title="Google Cloud Vision Operators" href="gcp/vision.html" />
<script>
document.addEventListener('DOMContentLoaded', function() {
var el = document.getElementById('changelog');
if (el !== null ) {
// [AIRFLOW-...]
el.innerHTML = el.innerHTML.replace(
/\[(AIRFLOW-[\d]+)\]/g,
`<a href="https://issues.apache.org/jira/browse/$1">[$1]</a>`
);
// (#...)
el.innerHTML = el.innerHTML.replace(
/\(#([\d]+)\)/g,
`<a href="https://github.com/apache/airflow/pull/$1">(#$1)</a>`
);
};
})
</script>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-140539454-1']);
_gaq.push(['_trackPageview']);
</script>
<style>
.example-header {
position: relative;
background: #9AAA7A;
padding: 8px 16px;
margin-bottom: 0;
}
.example-header--with-button {
padding-right: 166px;
}
.example-header:after{
content: '';
display: table;
clear: both;
}
.example-title {
display:block;
padding: 4px;
margin-right: 16px;
color: white;
overflow-x: auto;
}
.example-header-button {
top: 8px;
right: 16px;
position: absolute;
}
.example-header + .highlight-python {
margin-top: 0 !important;
}
.viewcode-button {
display: inline-block;
padding: 8px 16px;
border: 0;
margin: 0;
outline: 0;
border-radius: 2px;
-webkit-box-shadow: 0 3px 5px 0 rgba(0,0,0,.3);
box-shadow: 0 3px 6px 0 rgba(0,0,0,.3);
color: #404040;
background-color: #e7e7e7;
cursor: pointer;
font-size: 16px;
font-weight: 500;
line-height: 1;
text-decoration: none;
text-overflow: ellipsis;
overflow: hidden;
text-transform: uppercase;
-webkit-transition: background-color .2s;
transition: background-color .2s;
vertical-align: middle;
white-space: nowrap;
}
.viewcode-button:visited {
color: #404040;
}
.viewcode-button:hover, .viewcode-button:focus {
color: #404040;
background-color: #d6d6d6;
}
</style>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.6
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../tutorial.html">Tutorial</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">How-to Guides</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../set-config.html">Setting Configuration Options</a></li>
<li class="toctree-l2"><a class="reference internal" href="../initialize-database.html">Initializing a Database Backend</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="index.html">Using Operators</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="bash.html">BashOperator</a></li>
<li class="toctree-l3"><a class="reference internal" href="dingding.html">Dingding Operators</a></li>
<li class="toctree-l3"><a class="reference internal" href="gcp/index.html">Google Cloud Operators</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">Papermill</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#usage">Usage</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="python.html">PythonOperator</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../connection/index.html">Managing Connections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../secure-connections.html">Securing Connections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../secure-connections.html#rotating-encryption-keys">Rotating encryption keys</a></li>
<li class="toctree-l2"><a class="reference internal" href="../write-logs.html">Writing Logs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../executor/use-celery.html">Celery Executor</a></li>
<li class="toctree-l2"><a class="reference internal" href="../executor/use-dask.html">Dask Executor</a></li>
<li class="toctree-l2"><a class="reference internal" href="../executor/use-mesos.html">Scaling Out with Mesos (community contributed)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../run-behind-proxy.html">Running Airflow behind a reverse proxy</a></li>
<li class="toctree-l2"><a class="reference internal" href="../run-with-systemd.html">Running Airflow with systemd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../run-with-upstart.html">Running Airflow with upstart</a></li>
<li class="toctree-l2"><a class="reference internal" href="../use-test-config.html">Using the Test Mode Configuration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../check-health.html">Checking Airflow Health Status</a></li>
<li class="toctree-l2"><a class="reference internal" href="../define_extra_link.html">Define an operator extra link</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tracking-user-activity.html">Tracking User Activity</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cli-completion.html">Set Up Bash/Zsh Completion</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../cli.html">Command Line Interface Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">REST API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../errors.html">Error Tracking</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../macros.html">Macros reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../_api/index.html">Python API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../privacy_notice.html">Privacy Notice</a></li>
</ul>
<p class="caption"><span class="caption-text">References</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../_api/index.html">Python API</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html">Docs</a> &raquo;</li>
<li><a href="../index.html">How-to Guides</a> &raquo;</li>
<li><a href="index.html">Using Operators</a> &raquo;</li>
<li>Papermill</li>
<li class="wy-breadcrumbs-aside">
<a href="../../_sources/howto/operator/papermill.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<blockquote>
<div></div></blockquote>
<div class="section" id="papermill">
<h1>Papermill<a class="headerlink" href="#papermill" title="Permalink to this headline"></a></h1>
<p>Apache Airflow supports integration with <a class="reference external" href="https://papermill.readthedocs.io/en/latest/">Papermill</a>. Papermill is a tool for
parameterizing and executing Jupyter Notebooks. Perhaps you have a financial
report that you wish to run with different values on the first or last day of
a month or at the beginning or end of the year. Using <em>parameters</em> in your
notebook and using the <em>PapermillOperator</em> makes this a breeze.</p>
<div class="section" id="usage">
<h2>Usage<a class="headerlink" href="#usage" title="Permalink to this headline"></a></h2>
<div class="section" id="creating-a-notebook">
<h3>Creating a notebook<a class="headerlink" href="#creating-a-notebook" title="Permalink to this headline"></a></h3>
<p>To parameterize your notebook designate a cell with the tag parameters. Papermill
looks for the parameters cell and treats this cell as defaults for the parameters
passed in at execution time. Papermill will add a new cell tagged with injected-parameters
with input parameters in order to overwrite the values in parameters. If no cell is
tagged with parameters the injected cell will be inserted at the top of the notebook.</p>
<p>Note that Jupyter notebook has out of the box support for tags but you need to install
the celltags extension for Jupyter Lab: <code class="docutils literal notranslate"><span class="pre">jupyter</span> <span class="pre">labextension</span> <span class="pre">install</span> <span class="pre">&#64;jupyterlab/celltags</span></code></p>
<p>Make sure that you save your notebook somewhere so that Airflow can access it. Papermill
supports S3, GCS, Azure and Local. HDFS is <em>not</em> supported.</p>
</div>
<div class="section" id="example-dag">
<h3>Example DAG<a class="headerlink" href="#example-dag" title="Permalink to this headline"></a></h3>
<p>Use the <code class="xref py py-class docutils literal notranslate"><span class="pre">PapermillOperator</span></code>
to execute a jupyter notebook:</p>
<div class="example-block-wrapper docutils container">
<p class="example-header example-header--with-button"><span class="example-title">airflow/contrib/example_dags/example_papermill_operator.py</span><a class="example-header-button viewcode-button reference internal" href="../../_modules/airflow/contrib/example_dags/example_papermill_operator.html"><span>View Source</span></a></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">run_this</span> <span class="o">=</span> <span class="n">PapermillOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s2">&quot;run_example_notebook&quot;</span><span class="p">,</span>
<span class="n">input_nb</span><span class="o">=</span><span class="s2">&quot;/tmp/hello_world.ipynb&quot;</span><span class="p">,</span>
<span class="n">output_nb</span><span class="o">=</span><span class="s2">&quot;/tmp/out-{{ execution_date }}.ipynb&quot;</span><span class="p">,</span>
<span class="n">parameters</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;msgs&quot;</span><span class="p">:</span> <span class="s2">&quot;Ran from Airflow at {{ execution_date }}!&quot;</span><span class="p">}</span>
<span class="p">)</span>
</pre></div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="python.html" class="btn btn-neutral float-right" title="PythonOperator" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="gcp/vision.html" class="btn btn-neutral float-left" title="Google Cloud Vision Operators" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
<div class="footer">This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
the JavaScript coming from www.google-analytics.com. Check our
<a href="../../privacy_notice.html">Privacy Policy</a>
for more details.
<script type="text/javascript">
(function() {
var ga = document.createElement('script');
ga.src = ('https:' == document.location.protocol ?
'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
ga.setAttribute('async', 'true');
var nodes = document.documentElement.childNodes;
var i = -1;
var node;
do {
i++;
node = nodes[i]
} while(node.nodeType !== Node.ELEMENT_NODE);
node.appendChild(ga);
})();
</script>
</div>
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>