blob: 622a2d6fd6d7af8290c234194c691e76890ec9b3 [file] [log] [blame]
<!--
Javascript to render AIRFLOW-XXX and PR references in text
as HTML links.
Overrides extrahead block from sphinx_rtd_theme
https://www.sphinx-doc.org/en/master/templating.html
-->
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DAG Serialization &mdash; Airflow Documentation</title>
<link rel="shortcut icon" href="_static/pin_32.png"/>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script type="text/javascript" src="_static/jira-links.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/graphviz.css" type="text/css" />
<link rel="stylesheet" href="_static/exampleinclude.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Changelog" href="changelog.html" />
<link rel="prev" title="Lineage" href="lineage.html" />
<script>
</script>
<style>
</style>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.8
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="executor/index.html">Executor</a></li>
<li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="api.html">REST API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="errors.html">Error Tracking</a></li>
<li class="toctree-l1"><a class="reference internal" href="kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">DAG Serialization</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#enable-dag-serialization">Enable Dag Serialization</a></li>
<li class="toctree-l2"><a class="reference internal" href="#limitations">Limitations</a></li>
<li class="toctree-l2"><a class="reference internal" href="#using-a-different-json-library">Using a different JSON Library</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="best-practices.html">Best Practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="macros.html">Macros reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="privacy_notice.html">Privacy Notice</a></li>
</ul>
<p class="caption"><span class="caption-text">References</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="_api/index.html">Python API</a></li>
<li class="toctree-l1"><a class="reference internal" href="configurations-ref.html">Configurations</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li>DAG Serialization</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/dag-serialization.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<blockquote>
<div></div></blockquote>
<div class="section" id="dag-serialization">
<h1>DAG Serialization<a class="headerlink" href="#dag-serialization" title="Permalink to this headline"></a></h1>
<p>In order to make Airflow Webserver stateless (almost!), Airflow &gt;=1.10.7 supports
DAG Serialization and DB Persistence.</p>
<img alt="_images/dag_serialization.png" src="_images/dag_serialization.png" />
<p>Without DAG Serialization &amp; persistence in DB, the Webserver and the Scheduler both
needs access to the DAG files. Both the scheduler and webserver parses the DAG files.</p>
<p>With <strong>DAG Serialization</strong> we aim to decouple the webserver from DAG parsing
which would make the Webserver very light-weight.</p>
<p>As shown in the image above, when using the this feature,
the Scheduler parses the DAG files, serializes them in JSON format and saves them in the Metadata DB.</p>
<p>The Webserver now instead of having to parse the DAG file again, reads the
serialized DAGs in JSON, de-serializes them and create the DagBag and uses it
to show in the UI.</p>
<p>One of the key features that is implemented as the part of DAG Serialization is that
instead of loading an entire DagBag when the WebServer starts we only load each DAG on demand from the
Serialized Dag table. This helps reduce Webserver startup time and memory. The reduction is notable
when you have large number of DAGs.</p>
<p>Below is the screenshot of the <code class="docutils literal notranslate"><span class="pre">serialized_dag</span></code> table in Metadata DB:</p>
<img alt="_images/serialized_dag_table.png" src="_images/serialized_dag_table.png" />
<div class="section" id="enable-dag-serialization">
<h2>Enable Dag Serialization<a class="headerlink" href="#enable-dag-serialization" title="Permalink to this headline"></a></h2>
<p>Add the following settings in <code class="docutils literal notranslate"><span class="pre">airflow.cfg</span></code>:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[core]</span>
<span class="na">store_serialized_dags</span> <span class="o">=</span> <span class="s">True</span>
<span class="na">min_serialized_dag_update_interval</span> <span class="o">=</span> <span class="s">30</span>
</pre></div>
</div>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">store_serialized_dags</span></code>: This flag decides whether to serialises DAGs and persist them in DB.
If set to True, Webserver reads from DB instead of parsing DAG files</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">min_serialized_dag_update_interval</span></code>: This flag sets the minimum interval (in seconds) after which
the serialized DAG in DB should be updated. This helps in reducing database write rate.</p></li>
</ul>
<p>If you are updating Airflow from &lt;1.10.7, please do not forget to run <code class="docutils literal notranslate"><span class="pre">airflow</span> <span class="pre">db</span> <span class="pre">upgrade</span></code>.</p>
</div>
<div class="section" id="limitations">
<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this headline"></a></h2>
<p>The Webserver will still need access to DAG files in the following cases,
which is why we said “almost” stateless.</p>
<ul class="simple">
<li><p><strong>Rendered Template</strong> tab will still have to parse Python file as it needs all the details like
the execution date and even the data passed by the upstream task using Xcom.</p></li>
<li><p><strong>Code View</strong> will read the DAG File &amp; show it using Pygments.
However, it does not need to Parse the Python file so it is still a small operation.</p></li>
</ul>
</div>
<div class="section" id="using-a-different-json-library">
<h2>Using a different JSON Library<a class="headerlink" href="#using-a-different-json-library" title="Permalink to this headline"></a></h2>
<p>To use a different JSON library instead of the standard <code class="docutils literal notranslate"><span class="pre">json</span></code> library like <code class="docutils literal notranslate"><span class="pre">ujson</span></code>, you need to
define a <code class="docutils literal notranslate"><span class="pre">json</span></code> variable in local Airflow settings (<code class="docutils literal notranslate"><span class="pre">airflow_local_settings.py</span></code>) file as follows:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">ujson</span>
<span class="n">json</span> <span class="o">=</span> <span class="n">ujson</span>
</pre></div>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="changelog.html" class="btn btn-neutral float-right" title="Changelog" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="lineage.html" class="btn btn-neutral float-left" title="Lineage" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
<div class="footer">This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
the JavaScript coming from www.google-analytics.com. Check our
<a href="privacy_notice.html">Privacy Policy</a>
for more details.
</div>
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<!-- Theme Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-140539454-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>