blob: 5420c6b2065ebccee14efe84c04b8f5fa96eb938 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Writing Logs &mdash; Airflow Documentation</title>
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Scaling Out with Celery" href="executor/use-celery.html" />
<link rel="prev" title="Securing Connections" href="secure-connections.html" />
<script src="../_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="../index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.2
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html">How-to Guides</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="set-config.html">Setting Configuration Options</a></li>
<li class="toctree-l2"><a class="reference internal" href="initialize-database.html">Initializing a Database Backend</a></li>
<li class="toctree-l2"><a class="reference internal" href="operator.html">Using Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="manage-connections.html">Managing Connections</a></li>
<li class="toctree-l2"><a class="reference internal" href="secure-connections.html">Securing Connections</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Writing Logs</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#writing-logs-locally">Writing Logs Locally</a></li>
<li class="toctree-l3"><a class="reference internal" href="#writing-logs-to-amazon-s3">Writing Logs to Amazon S3</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#before-you-begin">Before you begin</a></li>
<li class="toctree-l4"><a class="reference internal" href="#enabling-remote-logging">Enabling remote logging</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#writing-logs-to-azure-blob-storage">Writing Logs to Azure Blob Storage</a></li>
<li class="toctree-l3"><a class="reference internal" href="#writing-logs-to-google-cloud-storage">Writing Logs to Google Cloud Storage</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="executor/use-celery.html">Scaling Out with Celery</a></li>
<li class="toctree-l2"><a class="reference internal" href="executor/use-dask.html">Scaling Out with Dask</a></li>
<li class="toctree-l2"><a class="reference internal" href="executor/use-mesos.html">Scaling Out with Mesos (community contributed)</a></li>
<li class="toctree-l2"><a class="reference internal" href="run-with-systemd.html">Running Airflow with systemd</a></li>
<li class="toctree-l2"><a class="reference internal" href="run-with-upstart.html">Running Airflow with upstart</a></li>
<li class="toctree-l2"><a class="reference internal" href="use-test-config.html">Using the Test Mode Configuration</a></li>
<li class="toctree-l2"><a class="reference internal" href="check-health.html">Checking Airflow Health Status</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="../timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api.html">Experimental Rest API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="../kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html">Docs</a> &raquo;</li>
<li><a href="index.html">How-to Guides</a> &raquo;</li>
<li>Writing Logs</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/howto/write-logs.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="writing-logs">
<h1>Writing Logs<a class="headerlink" href="#writing-logs" title="Permalink to this headline"></a></h1>
<div class="section" id="writing-logs-locally">
<h2>Writing Logs Locally<a class="headerlink" href="#writing-logs-locally" title="Permalink to this headline"></a></h2>
<p>Users can specify a logs folder in <code class="docutils literal notranslate"><span class="pre">airflow.cfg</span></code> using the
<code class="docutils literal notranslate"><span class="pre">base_log_folder</span></code> setting. By default, it is in the <code class="docutils literal notranslate"><span class="pre">AIRFLOW_HOME</span></code>
directory.</p>
<p>In addition, users can supply a remote location for storing logs and log
backups in cloud storage.</p>
<p>In the Airflow Web UI, local logs take precedence over remote logs. If local logs
can not be found or accessed, the remote logs will be displayed. Note that logs
are only sent to remote storage once a task completes (including failure). In other
words, remote logs for running tasks are unavailable. Logs are stored in the log
folder as <code class="docutils literal notranslate"><span class="pre">{dag_id}/{task_id}/{execution_date}/{try_number}.log</span></code>.</p>
</div>
<div class="section" id="writing-logs-to-amazon-s3">
<span id="write-logs-amazon"></span><h2>Writing Logs to Amazon S3<a class="headerlink" href="#writing-logs-to-amazon-s3" title="Permalink to this headline"></a></h2>
<div class="section" id="before-you-begin">
<h3>Before you begin<a class="headerlink" href="#before-you-begin" title="Permalink to this headline"></a></h3>
<p>Remote logging uses an existing Airflow connection to read/write logs. If you
don’t have a connection properly setup, this will fail.</p>
</div>
<div class="section" id="enabling-remote-logging">
<h3>Enabling remote logging<a class="headerlink" href="#enabling-remote-logging" title="Permalink to this headline"></a></h3>
<p>To enable this feature, <code class="docutils literal notranslate"><span class="pre">airflow.cfg</span></code> must be configured as in this
example:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">[</span>core<span class="o">]</span>
<span class="c1"># Airflow can store logs remotely in AWS S3. Users must supply a remote</span>
<span class="c1"># location URL (starting with either &#39;s3://...&#39;) and an Airflow connection</span>
<span class="c1"># id that provides access to the storage location.</span>
<span class="nv">remote_logging</span> <span class="o">=</span> True
<span class="nv">remote_base_log_folder</span> <span class="o">=</span> s3://my-bucket/path/to/logs
<span class="nv">remote_log_conn_id</span> <span class="o">=</span> MyS3Conn
<span class="c1"># Use server-side encryption for logs stored in S3</span>
<span class="nv">encrypt_s3_logs</span> <span class="o">=</span> False
</pre></div>
</div>
<p>In the above example, Airflow will try to use <code class="docutils literal notranslate"><span class="pre">S3Hook('MyS3Conn')</span></code>.</p>
</div>
</div>
<div class="section" id="writing-logs-to-azure-blob-storage">
<span id="write-logs-azure"></span><h2>Writing Logs to Azure Blob Storage<a class="headerlink" href="#writing-logs-to-azure-blob-storage" title="Permalink to this headline"></a></h2>
<p>Airflow can be configured to read and write task logs in Azure Blob Storage.
Follow the steps below to enable Azure Blob Storage logging.</p>
<ol class="arabic">
<li><p class="first">Airflow’s logging system requires a custom .py file to be located in the <code class="docutils literal notranslate"><span class="pre">PYTHONPATH</span></code>, so that it’s importable from Airflow. Start by creating a directory to store the config file. <code class="docutils literal notranslate"><span class="pre">$AIRFLOW_HOME/config</span></code> is recommended.</p>
</li>
<li><p class="first">Create empty files called <code class="docutils literal notranslate"><span class="pre">$AIRFLOW_HOME/config/log_config.py</span></code> and <code class="docutils literal notranslate"><span class="pre">$AIRFLOW_HOME/config/__init__.py</span></code>.</p>
</li>
<li><p class="first">Copy the contents of <code class="docutils literal notranslate"><span class="pre">airflow/config_templates/airflow_local_settings.py</span></code> into the <code class="docutils literal notranslate"><span class="pre">log_config.py</span></code> file that was just created in the step above.</p>
</li>
<li><p class="first">Customize the following portions of the template:</p>
<blockquote>
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># wasb buckets should start with &quot;wasb&quot; just to help Airflow select correct handler</span>
<span class="nv">REMOTE_BASE_LOG_FOLDER</span> <span class="o">=</span> <span class="s1">&#39;wasb-&lt;whatever you want here&gt;&#39;</span>
<span class="c1"># Rename DEFAULT_LOGGING_CONFIG to LOGGING CONFIG</span>
<span class="nv">LOGGING_CONFIG</span> <span class="o">=</span> ...
</pre></div>
</div>
</div></blockquote>
</li>
<li><p class="first">Make sure a Azure Blob Storage (Wasb) connection hook has been defined in Airflow. The hook should have read and write access to the Azure Blob Storage bucket defined above in <code class="docutils literal notranslate"><span class="pre">REMOTE_BASE_LOG_FOLDER</span></code>.</p>
</li>
<li><p class="first">Update <code class="docutils literal notranslate"><span class="pre">$AIRFLOW_HOME/airflow.cfg</span></code> to contain:</p>
<blockquote>
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nv">remote_logging</span> <span class="o">=</span> True
<span class="nv">logging_config_class</span> <span class="o">=</span> log_config.LOGGING_CONFIG
<span class="nv">remote_log_conn_id</span> <span class="o">=</span> &lt;name of the Azure Blob Storage connection&gt;
</pre></div>
</div>
</div></blockquote>
</li>
<li><p class="first">Restart the Airflow webserver and scheduler, and trigger (or wait for) a new task execution.</p>
</li>
<li><p class="first">Verify that logs are showing up for newly executed tasks in the bucket you’ve defined.</p>
</li>
</ol>
</div>
<div class="section" id="writing-logs-to-google-cloud-storage">
<span id="write-logs-gcp"></span><h2>Writing Logs to Google Cloud Storage<a class="headerlink" href="#writing-logs-to-google-cloud-storage" title="Permalink to this headline"></a></h2>
<p>Follow the steps below to enable Google Cloud Storage logging.</p>
<p>To enable this feature, <code class="docutils literal notranslate"><span class="pre">airflow.cfg</span></code> must be configured as in this
example:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">[</span>core<span class="o">]</span>
<span class="c1"># Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search.</span>
<span class="c1"># Users must supply an Airflow connection id that provides access to the storage</span>
<span class="c1"># location. If remote_logging is set to true, see UPDATING.md for additional</span>
<span class="c1"># configuration requirements.</span>
<span class="nv">remote_logging</span> <span class="o">=</span> True
<span class="nv">remote_base_log_folder</span> <span class="o">=</span> gs://my-bucket/path/to/logs
<span class="nv">remote_log_conn_id</span> <span class="o">=</span> MyGCSConn
</pre></div>
</div>
<ol class="arabic">
<li><p class="first">Install the <code class="docutils literal notranslate"><span class="pre">gcp_api</span></code> package first, like so: <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">apache-airflow[gcp_api]</span></code>.</p>
</li>
<li><p class="first">Make sure a Google Cloud Platform connection hook has been defined in Airflow. The hook should have read and write access to the Google Cloud Storage bucket defined above in <code class="docutils literal notranslate"><span class="pre">remote_base_log_folder</span></code>.</p>
</li>
<li><p class="first">Restart the Airflow webserver and scheduler, and trigger (or wait for) a new task execution.</p>
</li>
<li><p class="first">Verify that logs are showing up for newly executed tasks in the bucket you’ve defined.</p>
</li>
<li><p class="first">Verify that the Google Cloud Storage viewer is working in the UI. Pull up a newly executed task, and verify that you see something like:</p>
<blockquote>
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>*** Reading remote log from gs://&lt;bucket where logs should be persisted&gt;/example_bash_operator/run_this_last/2017-10-03T00:00:00/16.log.
<span class="o">[</span><span class="m">2017</span>-10-03 <span class="m">21</span>:57:50,056<span class="o">]</span> <span class="o">{</span>cli.py:377<span class="o">}</span> INFO - Running on host chrisr-00532
<span class="o">[</span><span class="m">2017</span>-10-03 <span class="m">21</span>:57:50,093<span class="o">]</span> <span class="o">{</span>base_task_runner.py:115<span class="o">}</span> INFO - Running: <span class="o">[</span><span class="s1">&#39;bash&#39;</span>, <span class="s1">&#39;-c&#39;</span>, u<span class="s1">&#39;airflow run example_bash_operator run_this_last 2017-10-03T00:00:00 --job_id 47 --raw -sd DAGS_FOLDER/example_dags/example_bash_operator.py&#39;</span><span class="o">]</span>
<span class="o">[</span><span class="m">2017</span>-10-03 <span class="m">21</span>:57:51,264<span class="o">]</span> <span class="o">{</span>base_task_runner.py:98<span class="o">}</span> INFO - Subtask: <span class="o">[</span><span class="m">2017</span>-10-03 <span class="m">21</span>:57:51,263<span class="o">]</span> <span class="o">{</span>__init__.py:45<span class="o">}</span> INFO - Using executor SequentialExecutor
<span class="o">[</span><span class="m">2017</span>-10-03 <span class="m">21</span>:57:51,306<span class="o">]</span> <span class="o">{</span>base_task_runner.py:98<span class="o">}</span> INFO - Subtask: <span class="o">[</span><span class="m">2017</span>-10-03 <span class="m">21</span>:57:51,306<span class="o">]</span> <span class="o">{</span>models.py:186<span class="o">}</span> INFO - Filling up the DagBag from /airflow/dags/example_dags/example_bash_operator.py
</pre></div>
</div>
</div></blockquote>
</li>
</ol>
<p>Note the top line that says it’s reading from the remote log file.</p>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="executor/use-celery.html" class="btn btn-neutral float-right" title="Scaling Out with Celery" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="secure-connections.html" class="btn btn-neutral" title="Securing Connections" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script type="text/javascript" src="../_static/jquery.js"></script>
<script type="text/javascript" src="../_static/underscore.js"></script>
<script type="text/javascript" src="../_static/doctools.js"></script>
<script type="text/javascript" src="../_static/language_data.js"></script>
<script type="text/javascript" src="../_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>