blob: 12a5e3070ccb23d99de7a5f2c06277e17504c3c8 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Integration &mdash; Airflow Documentation</title>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Lineage" href="lineage.html" />
<link rel="prev" title="Experimental Rest API" href="api.html" />
<script src="_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="index.html" class="icon icon-home"> Airflow
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="api.html">Experimental Rest API</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Integration</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#reverse-proxy">Reverse Proxy</a></li>
<li class="toctree-l2"><a class="reference internal" href="#azure-microsoft-azure">Azure: Microsoft Azure</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#azure-blob-storage">Azure Blob Storage</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#wasbblobsensor">WasbBlobSensor</a></li>
<li class="toctree-l4"><a class="reference internal" href="#wasbprefixsensor">WasbPrefixSensor</a></li>
<li class="toctree-l4"><a class="reference internal" href="#filetowasboperator">FileToWasbOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#wasbhook">WasbHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#azure-file-share">Azure File Share</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#azurefilesharehook">AzureFileShareHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#logging">Logging</a></li>
<li class="toctree-l3"><a class="reference internal" href="#azure-data-lake">Azure Data Lake</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#azuredatalakehook">AzureDataLakeHook</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#aws-amazon-web-services">AWS: Amazon Web Services</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#aws-emr">AWS EMR</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#emraddstepsoperator">EmrAddStepsOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#emrcreatejobflowoperator">EmrCreateJobFlowOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#emrterminatejobflowoperator">EmrTerminateJobFlowOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#emrhook">EmrHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-s3">AWS S3</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#s3hook">S3Hook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3filetransformoperator">S3FileTransformOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3listoperator">S3ListOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3togooglecloudstorageoperator">S3ToGoogleCloudStorageOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3tohivetransfer">S3ToHiveTransfer</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-ec2-container-service">AWS EC2 Container Service</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#ecsoperator">ECSOperator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-batch-service">AWS Batch Service</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#awsbatchoperator">AWSBatchOperator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-redshift">AWS RedShift</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#awsredshiftclustersensor">AwsRedshiftClusterSensor</a></li>
<li class="toctree-l4"><a class="reference internal" href="#redshifthook">RedshiftHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#redshifttos3transfer">RedshiftToS3Transfer</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3toredshifttransfer">S3ToRedshiftTransfer</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#databricks">Databricks</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#databrickssubmitrunoperator">DatabricksSubmitRunOperator</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#gcp-google-cloud-platform">GCP: Google Cloud Platform</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#id22">Logging</a></li>
<li class="toctree-l3"><a class="reference internal" href="#bigquery">BigQuery</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#bigquery-operators">BigQuery Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#bigqueryhook">BigQueryHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-sql">Cloud SQL</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-sql-operators">Cloud SQL Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-sql-hook">Cloud SQL Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#compute-engine">Compute Engine</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#compute-engine-operators">Compute Engine Operators</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-functions">Cloud Functions</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-functions-operators">Cloud Functions Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-functions-hook">Cloud Functions Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-dataflow">Cloud DataFlow</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#dataflow-operators">DataFlow Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#dataflowhook">DataFlowHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-dataproc">Cloud DataProc</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#dataproc-operators">DataProc Operators</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-datastore">Cloud Datastore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#datastore-operators">Datastore Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#datastorehook">DatastoreHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-ml-engine">Cloud ML Engine</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-operators">Cloud ML Engine Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-hook">Cloud ML Engine Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-storage">Cloud Storage</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#storage-operators">Storage Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#googlecloudstoragehook">GoogleCloudStorageHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#google-kubernetes-engine">Google Kubernetes Engine</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-cluster-operators">Google Kubernetes Engine Cluster Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-hook">Google Kubernetes Engine Hook</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="code.html">API Reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li>Integration</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/integration.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="integration">
<h1>Integration<a class="headerlink" href="#integration" title="Permalink to this headline"></a></h1>
<ul class="simple">
<li><a class="reference internal" href="#reverseproxy"><span class="std std-ref">Reverse Proxy</span></a></li>
<li><a class="reference internal" href="#azure"><span class="std std-ref">Azure: Microsoft Azure</span></a></li>
<li><a class="reference internal" href="#aws"><span class="std std-ref">AWS: Amazon Web Services</span></a></li>
<li><a class="reference internal" href="#databricks"><span class="std std-ref">Databricks</span></a></li>
<li><a class="reference internal" href="#gcp"><span class="std std-ref">GCP: Google Cloud Platform</span></a></li>
</ul>
<div class="section" id="reverse-proxy">
<span id="reverseproxy"></span><h2>Reverse Proxy<a class="headerlink" href="#reverse-proxy" title="Permalink to this headline"></a></h2>
<p>Airflow can be set up behind a reverse proxy, with the ability to set its endpoint with great
flexibility.</p>
<p>For example, you can configure your reverse proxy to get:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">lab</span><span class="o">.</span><span class="n">mycompany</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span><span class="o">/</span>
</pre></div>
</div>
<p>To do so, you need to set the following setting in your <cite>airflow.cfg</cite>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">base_url</span> <span class="o">=</span> <span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="n">my_host</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span>
</pre></div>
</div>
<p>Additionally if you use Celery Executor, you can get Flower in <cite>/myorg/flower</cite> with:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">flower_url_prefix</span> <span class="o">=</span> <span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">flower</span>
</pre></div>
</div>
<p>Your reverse proxy (ex: nginx) should be configured as follow:</p>
<ul>
<li><p class="first">pass the url and http header as it for the Airflow webserver, without any rewrite, for example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server {
listen 80;
server_name lab.mycompany.com;
location /myorg/airflow/ {
proxy_pass http://localhost:8080;
proxy_set_header Host $host;
proxy_redirect off;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection &quot;upgrade&quot;;
}
}
</pre></div>
</div>
</li>
<li><p class="first">rewrite the url for the flower endpoint:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server {
listen 80;
server_name lab.mycompany.com;
location /myorg/flower/ {
rewrite ^/myorg/flower/(.*)$ /$1 break; # remove prefix from http header
proxy_pass http://localhost:5555;
proxy_set_header Host $host;
proxy_redirect off;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection &quot;upgrade&quot;;
}
}
</pre></div>
</div>
</li>
</ul>
<p>To ensure that Airflow generates URLs with the correct scheme when
running behind a TLS-terminating proxy, you should configure the proxy
to set the <cite>X-Forwarded-Proto</cite> header, and enable the <cite>ProxyFix</cite>
middleware in your <cite>airflow.cfg</cite>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">enable_proxy_fix</span> <span class="o">=</span> <span class="kc">True</span>
</pre></div>
</div>
<p>Note: you should only enable the <cite>ProxyFix</cite> middleware when running
Airflow behind a trusted proxy (AWS ELB, nginx, etc.).</p>
</div>
<div class="section" id="azure-microsoft-azure">
<span id="azure"></span><h2>Azure: Microsoft Azure<a class="headerlink" href="#azure-microsoft-azure" title="Permalink to this headline"></a></h2>
<p>Airflow has limited support for Microsoft Azure: interfaces exist only for Azure Blob
Storage and Azure Data Lake. Hook, Sensor and Operator for Blob Storage and
Azure Data Lake Hook are in contrib section.</p>
<div class="section" id="azure-blob-storage">
<h3>Azure Blob Storage<a class="headerlink" href="#azure-blob-storage" title="Permalink to this headline"></a></h3>
<p>All classes communicate via the Window Azure Storage Blob protocol. Make sure that a
Airflow connection of type <cite>wasb</cite> exists. Authorization can be done by supplying a
login (=Storage account name) and password (=KEY), or login and SAS token in the extra
field (see connection <cite>wasb_default</cite> for an example).</p>
<ul class="simple">
<li><a class="reference internal" href="#wasbblobsensor"><span class="std std-ref">WasbBlobSensor</span></a>: Checks if a blob is present on Azure Blob storage.</li>
<li><a class="reference internal" href="#wasbprefixsensor"><span class="std std-ref">WasbPrefixSensor</span></a>: Checks if blobs matching a prefix are present on Azure Blob storage.</li>
<li><a class="reference internal" href="#filetowasboperator"><span class="std std-ref">FileToWasbOperator</span></a>: Uploads a local file to a container as a blob.</li>
<li><a class="reference internal" href="#wasbhook"><span class="std std-ref">WasbHook</span></a>: Interface with Azure Blob Storage.</li>
</ul>
<div class="section" id="wasbblobsensor">
<span id="id1"></span><h4>WasbBlobSensor<a class="headerlink" href="#wasbblobsensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbBlobSensor</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>wasb_conn_id='wasb_default'</em>, <em>check_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Waits for a blob to arrive on Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li>
<li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that
<cite>WasbHook.check_for_blob()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="wasbprefixsensor">
<span id="id2"></span><h4>WasbPrefixSensor<a class="headerlink" href="#wasbprefixsensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbPrefixSensor</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>wasb_conn_id='wasb_default'</em>, <em>check_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Waits for blobs matching a prefix to arrive on Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li>
<li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li>
<li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that
<cite>WasbHook.check_for_prefix()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="filetowasboperator">
<span id="id3"></span><h4>FileToWasbOperator<a class="headerlink" href="#filetowasboperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_wasb.</code><code class="descname">FileToWasbOperator</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>wasb_conn_id='wasb_default'</em>, <em>load_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Uploads a file to Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to load. (templated)</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container. (templated)</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob. (templated)</li>
<li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li>
<li><strong>load_options</strong> (<em>dict</em>) – Optional keyword arguments that
<cite>WasbHook.load_file()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure Blob Storage.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="wasbhook">
<span id="id4"></span><h4>WasbHook<a class="headerlink" href="#wasbhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.wasb_hook.</code><code class="descname">WasbHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure Blob Storage through the wasb:// protocol.</p>
<p>Additional options passed in the ‘extra’ field of the connection will be
passed to the <cite>BlockBlockService()</cite> constructor. For example, authenticate
using a SAS token by adding {“sas_token”: “YOUR_TOKEN”}.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob">
<code class="descname">check_for_blob</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_blob"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob" title="Permalink to this definition"></a></dt>
<dd><p>Check if a blob exists on Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.exists()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if the blob exists, False otherwise.</p>
</td>
</tr>
</tbody>
</table>
<p>:rtype bool</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix">
<code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix" title="Permalink to this definition"></a></dt>
<dd><p>Check if a prefix exists on Azure Blob storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.list_blobs()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if blobs matching the prefix exist, False otherwise.</p>
</td>
</tr>
</tbody>
</table>
<p>:rtype bool</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.delete_file">
<code class="descname">delete_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>is_prefix=False</em>, <em>ignore_if_missing=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.delete_file" title="Permalink to this definition"></a></dt>
<dd><p>Delete a file from Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>is_prefix</strong> (<em>bool</em>) – If blob_name is a prefix, delete all matching files</li>
<li><strong>ignore_if_missing</strong> – if True, then return success even if the</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>blob does not exist.
:type ignore_if_missing: bool
:param kwargs: Optional keyword arguments that</p>
<blockquote>
<div><cite>BlockBlobService.create_blob_from_path()</cite> takes.</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return the BlockBlobService object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_file">
<code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_file" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to download.</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_file">
<code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_string">
<code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_string" title="Permalink to this definition"></a></dt>
<dd><p>Upload a string to Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>string_data</strong> (<em>str</em>) – String to load.</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_text()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.read_file">
<code class="descname">read_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.read_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.read_file" title="Permalink to this definition"></a></dt>
<dd><p>Read a file from Azure Blob Storage and return as a string.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="azure-file-share">
<h3>Azure File Share<a class="headerlink" href="#azure-file-share" title="Permalink to this headline"></a></h3>
<p>Cloud variant of a SMB file share. Make sure that a Airflow connection of
type <cite>wasb</cite> exists. Authorization can be done by supplying a login (=Storage account name)
and password (=Storage account key), or login and SAS token in the extra field
(see connection <cite>wasb_default</cite> for an example).</p>
<div class="section" id="azurefilesharehook">
<h4>AzureFileShareHook<a class="headerlink" href="#azurefilesharehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_fileshare_hook.</code><code class="descname">AzureFileShareHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure FileShare Storage.</p>
<p>Additional options passed in the ‘extra’ field of the connection will be
passed to the <cite>FileService()</cite> constructor.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory">
<code class="descname">check_for_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory" title="Permalink to this definition"></a></dt>
<dd><p>Check if a directory exists on Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.exists()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if the file exists, False otherwise.</p>
</td>
</tr>
</tbody>
</table>
<p>:rtype bool</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file">
<code class="descname">check_for_file</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file" title="Permalink to this definition"></a></dt>
<dd><p>Check if a file exists on Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.exists()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if the file exists, False otherwise.</p>
</td>
</tr>
</tbody>
</table>
<p>:rtype bool</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory">
<code class="descname">create_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory" title="Permalink to this definition"></a></dt>
<dd><p>Create a new direcotry on a Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_directory()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A list of files and directories</p>
</td>
</tr>
</tbody>
</table>
<p>:rtype list</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return the FileService object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file">
<code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Where to store the file.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.get_file_to_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream">
<code class="descname">get_file_to_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file_to_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>stream</strong> (<em>file-like object</em>) – A filehandle to store the file to.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.get_file_to_stream()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files">
<code class="descname">list_directories_and_files</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.list_directories_and_files"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files" title="Permalink to this definition"></a></dt>
<dd><p>Return the list of directories and files stored on a Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.list_directories_and_files()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A list of files and directories</p>
</td>
</tr>
</tbody>
</table>
<p>:rtype list</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file">
<code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_file_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream">
<code class="descname">load_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>count</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream" title="Permalink to this definition"></a></dt>
<dd><p>Upload a stream to Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>stream</strong> (<em>file-like</em>) – Opened file/stream to upload as the file content.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>count</strong> (<em>int</em>) – Size of the stream in bytes</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_file_from_stream()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string">
<code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string" title="Permalink to this definition"></a></dt>
<dd><p>Upload a string to Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>string_data</strong> (<em>str</em>) – String to load.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_file_from_text()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="logging">
<h3>Logging<a class="headerlink" href="#logging" title="Permalink to this headline"></a></h3>
<p>Airflow can be configured to read and write task logs in Azure Blob Storage.
See <a class="reference internal" href="howto/write-logs.html#write-logs-azure"><span class="std std-ref">Writing Logs to Azure Blob Storage</span></a>.</p>
</div>
<div class="section" id="azure-data-lake">
<h3>Azure Data Lake<a class="headerlink" href="#azure-data-lake" title="Permalink to this headline"></a></h3>
<p>AzureDataLakeHook communicates via a REST API compatible with WebHDFS. Make sure that a
Airflow connection of type <cite>azure_data_lake</cite> exists. Authorization can be done by supplying a
login (=Client ID), password (=Client Secret) and extra fields tenant (Tenant) and account_name (Account Name)</p>
<blockquote>
<div>(see connection <cite>azure_data_lake_default</cite> for an example).</div></blockquote>
<ul class="simple">
<li><a class="reference internal" href="#azuredatalakehook"><span class="std std-ref">AzureDataLakeHook</span></a>: Interface with Azure Data Lake.</li>
</ul>
<div class="section" id="azuredatalakehook">
<span id="id5"></span><h4>AzureDataLakeHook<a class="headerlink" href="#azuredatalakehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_data_lake_hook.</code><code class="descname">AzureDataLakeHook</code><span class="sig-paren">(</span><em>azure_data_lake_conn_id='azure_data_lake_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure Data Lake.</p>
<p>Client ID and client secret should be in user and password parameters.
Tenant and account name should be extra field as
{“tenant”: “&lt;TENANT&gt;”, “account_name”: “ACCOUNT_NAME”}.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – Reference to the Azure Data Lake connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file">
<code class="descname">check_for_file</code><span class="sig-paren">(</span><em>file_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file" title="Permalink to this definition"></a></dt>
<dd><p>Check if a file exists on Azure Data Lake.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>file_path</strong> (<em>str</em>) – Path and name of the file.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if the file exists, False otherwise.</td>
</tr>
</tbody>
</table>
<p>:rtype bool</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file">
<code class="descname">download_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.download_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>local_path</strong> (<em>str</em>) – local path. If downloading a single file, will write to this
specific file, unless it is an existing directory, in which case a file is
created within it. If downloading multiple files, this is the root
directory to write within. Will create directories as required.</li>
<li><strong>remote_path</strong> (<em>str</em>) – remote path/globstring to use to find remote files.
Recursive glob patterns using <cite>**</cite> are not supported.</li>
<li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li>
<li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories.
If False and remote path is a directory, will quit regardless if any files
would be overwritten or not. If True, only matching filenames are actually
overwritten.</li>
<li><strong>buffersize</strong> (<em>int</em>) – int [2**22]
Number of bytes for internal buffer. This block cannot be bigger than
a chunk and cannot be smaller than a block.</li>
<li><strong>blocksize</strong> (<em>int</em>) – int [2**22]
Number of bytes for a block. Within each chunk, we write a smaller
block for each API call. This block cannot be bigger than a chunk.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return a AzureDLFileSystem object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file">
<code class="descname">upload_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.upload_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure Data Lake.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>local_path</strong> (<em>str</em>) – local path. Can be single file, directory (in which case,
upload recursively) or glob pattern. Recursive glob patterns using <cite>**</cite>
are not supported.</li>
<li><strong>remote_path</strong> (<em>str</em>) – Remote path to upload to; if multiple files, this is the
dircetory root to write within.</li>
<li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li>
<li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories.
If False and remote path is a directory, will quit regardless if any files
would be overwritten or not. If True, only matching filenames are actually
overwritten.</li>
<li><strong>buffersize</strong> (<em>int</em>) – int [2**22]
Number of bytes for internal buffer. This block cannot be bigger than
a chunk and cannot be smaller than a block.</li>
<li><strong>blocksize</strong> (<em>int</em>) – int [2**22]
Number of bytes for a block. Within each chunk, we write a smaller
block for each API call. This block cannot be bigger than a chunk.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="aws-amazon-web-services">
<span id="aws"></span><h2>AWS: Amazon Web Services<a class="headerlink" href="#aws-amazon-web-services" title="Permalink to this headline"></a></h2>
<p>Airflow has extensive support for Amazon Web Services. But note that the Hooks, Sensors and
Operators are in the contrib section.</p>
<div class="section" id="aws-emr">
<h3>AWS EMR<a class="headerlink" href="#aws-emr" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#emraddstepsoperator"><span class="std std-ref">EmrAddStepsOperator</span></a> : Adds steps to an existing EMR JobFlow.</li>
<li><a class="reference internal" href="#emrcreatejobflowoperator"><span class="std std-ref">EmrCreateJobFlowOperator</span></a> : Creates an EMR JobFlow, reading the config from the EMR connection.</li>
<li><a class="reference internal" href="#emrterminatejobflowoperator"><span class="std std-ref">EmrTerminateJobFlowOperator</span></a> : Terminates an EMR JobFlow.</li>
<li><a class="reference internal" href="#emrhook"><span class="std std-ref">EmrHook</span></a> : Interact with AWS EMR.</li>
</ul>
<div class="section" id="emraddstepsoperator">
<span id="id6"></span><h4>EmrAddStepsOperator<a class="headerlink" href="#emraddstepsoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_add_steps_operator.</code><code class="descname">EmrAddStepsOperator</code><span class="sig-paren">(</span><em>job_flow_id</em>, <em>aws_conn_id='s3_default'</em>, <em>steps=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>An operator that adds steps to an existing EMR job_flow.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>job_flow_id</strong> – id of the JobFlow to add steps to. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li>
<li><strong>steps</strong> (<em>list</em>) – boto3 style steps to be added to the jobflow. (templated)</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="emrcreatejobflowoperator">
<span id="id7"></span><h4>EmrCreateJobFlowOperator<a class="headerlink" href="#emrcreatejobflowoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_create_job_flow_operator.</code><code class="descname">EmrCreateJobFlowOperator</code><span class="sig-paren">(</span><em>aws_conn_id='s3_default'</em>, <em>emr_conn_id='emr_default'</em>, <em>job_flow_overrides=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates an EMR JobFlow, reading the config from the EMR connection.
A dictionary of JobFlow overrides can be passed that override
the config from the connection.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li>
<li><strong>emr_conn_id</strong> (<em>str</em>) – emr connection to use</li>
<li><strong>job_flow_overrides</strong> – boto3 style arguments to override
emr_connection extra. (templated)</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="emrterminatejobflowoperator">
<span id="id8"></span><h4>EmrTerminateJobFlowOperator<a class="headerlink" href="#emrterminatejobflowoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_terminate_job_flow_operator.</code><code class="descname">EmrTerminateJobFlowOperator</code><span class="sig-paren">(</span><em>job_flow_id</em>, <em>aws_conn_id='s3_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator to terminate EMR JobFlows.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>job_flow_id</strong> – id of the JobFlow to terminate. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="emrhook">
<span id="id9"></span><h4>EmrHook<a class="headerlink" href="#emrhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.emr_hook.EmrHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.emr_hook.</code><code class="descname">EmrHook</code><span class="sig-paren">(</span><em>emr_conn_id=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with AWS EMR. emr_conn_id is only neccessary for using the
create_job_flow method.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow">
<code class="descname">create_job_flow</code><span class="sig-paren">(</span><em>job_flow_overrides</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook.create_job_flow"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow" title="Permalink to this definition"></a></dt>
<dd><p>Creates a job flow using the config from the EMR connection.
Keys of the json extra hash may have the arguments of the boto3
run_job_flow method.
Overrides for this config may be passed as the job_flow_overrides.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="aws-s3">
<h3>AWS S3<a class="headerlink" href="#aws-s3" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#s3hook"><span class="std std-ref">S3Hook</span></a> : Interact with AWS S3.</li>
<li><a class="reference internal" href="#s3filetransformoperator"><span class="std std-ref">S3FileTransformOperator</span></a> : Copies data from a source S3 location to a temporary location on the local filesystem.</li>
<li><a class="reference internal" href="#s3listoperator"><span class="std std-ref">S3ListOperator</span></a> : Lists the files matching a key prefix from a S3 location.</li>
<li><a class="reference internal" href="#s3togooglecloudstorageoperator"><span class="std std-ref">S3ToGoogleCloudStorageOperator</span></a> : Syncs an S3 location with a Google Cloud Storage bucket.</li>
<li><a class="reference internal" href="#s3tohivetransfer"><span class="std std-ref">S3ToHiveTransfer</span></a> : Moves data from S3 to Hive. The operator downloads a file from S3, stores the file locally before loading it into a Hive table.</li>
</ul>
<div class="section" id="s3hook">
<span id="id10"></span><h4>S3Hook<a class="headerlink" href="#s3hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.hooks.S3_hook.S3Hook">
<em class="property">class </em><code class="descclassname">airflow.hooks.S3_hook.</code><code class="descname">S3Hook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with AWS S3, using the boto3 library.</p>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_bucket">
<code class="descname">check_for_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Check if bucket_name exists.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_key">
<code class="descname">check_for_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_key" title="Permalink to this definition"></a></dt>
<dd><p>Checks if a key exists in a bucket</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_prefix">
<code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix</em>, <em>delimiter</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_prefix" title="Permalink to this definition"></a></dt>
<dd><p>Checks that a prefix exists in a bucket</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key">
<code class="descname">check_for_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key" title="Permalink to this definition"></a></dt>
<dd><p>Checks that a key matching a wildcard expression exists in a bucket</p>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.copy_object">
<code class="descname">copy_object</code><span class="sig-paren">(</span><em>source_bucket_key</em>, <em>dest_bucket_key</em>, <em>source_bucket_name=None</em>, <em>dest_bucket_name=None</em>, <em>source_version_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.copy_object"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.copy_object" title="Permalink to this definition"></a></dt>
<dd><p>Creates a copy of an object that is already stored in S3.</p>
<p>Note: the S3 connection used here needs to have access to both
source and destination bucket/key.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket_key</strong> (<em>str</em>) – <p>The key of the source object.</p>
<p>It can be either full s3:// style url or relative path from root level.</p>
<p>When it’s specified as a full s3:// url, please omit source_bucket_name.</p>
</li>
<li><strong>dest_bucket_key</strong> (<em>str</em>) – <p>The key of the object to copy to.</p>
<p>The convention to specify <cite>dest_bucket_key</cite> is the same
as <cite>source_bucket_key</cite>.</p>
</li>
<li><strong>source_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket where the source object is in.</p>
<p>It should be omitted when <cite>source_bucket_key</cite> is provided as a full s3:// url.</p>
</li>
<li><strong>dest_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket to where the object is copied.</p>
<p>It should be omitted when <cite>dest_bucket_key</cite> is provided as a full s3:// url.</p>
</li>
<li><strong>source_version_id</strong> (<em>str</em>) – Version ID of the source object (OPTIONAL)</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.create_bucket">
<code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.create_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Creates an Amazon S3 bucket.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – The name of the bucket</li>
<li><strong>region_name</strong> (<em>str</em>) – The name of the aws region in which to create the bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.delete_objects">
<code class="descname">delete_objects</code><span class="sig-paren">(</span><em>bucket</em>, <em>keys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.delete_objects"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.delete_objects" title="Permalink to this definition"></a></dt>
<dd><table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>str</em>) – Name of the bucket in which you are going to delete object(s)</li>
<li><strong>keys</strong> (<em>str</em><em> or </em><em>list</em>) – <p>The key(s) to delete from S3 bucket.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a string, it’s supposed to be the key name of
the single object to delete.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a list, it’s supposed to be the list of the
keys to delete.</p>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.get_bucket">
<code class="descname">get_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Returns a boto3.S3.Bucket object</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.get_key">
<code class="descname">get_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_key" title="Permalink to this definition"></a></dt>
<dd><p>Returns a boto3.s3.Object</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>key</strong> (<em>str</em>) – the path to the key</li>
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.get_wildcard_key">
<code class="descname">get_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_wildcard_key" title="Permalink to this definition"></a></dt>
<dd><p>Returns a boto3.s3.Object object matching the wildcard expression</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li>
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.list_keys">
<code class="descname">list_keys</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_keys" title="Permalink to this definition"></a></dt>
<dd><p>Lists keys in a bucket under prefix and not containing delimiter</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>prefix</strong> (<em>str</em>) – a key prefix</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li>
<li><strong>page_size</strong> (<em>int</em>) – pagination size</li>
<li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.list_prefixes">
<code class="descname">list_prefixes</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_prefixes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_prefixes" title="Permalink to this definition"></a></dt>
<dd><p>Lists prefixes in a bucket under prefix</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>prefix</strong> (<em>str</em>) – a key prefix</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li>
<li><strong>page_size</strong> (<em>int</em>) – pagination size</li>
<li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_bytes">
<code class="descname">load_bytes</code><span class="sig-paren">(</span><em>bytes_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_bytes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_bytes" title="Permalink to this definition"></a></dt>
<dd><p>Loads bytes to S3</p>
<p>This is provided as a convenience to drop a string in S3. It uses the
boto infrastructure to ship a file to s3.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bytes_data</strong> (<em>bytes</em>) – bytes to set as content for the key.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key
if it already exists</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side
by S3 and will be stored in an encrypted form while at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_file">
<code class="descname">load_file</code><span class="sig-paren">(</span><em>filename</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Loads a local file to S3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>filename</strong> (<em>str</em>) – name of the file to load.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key
if it already exists. If replace is False and the key exists, an
error will be raised.</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side
by S3 and will be stored in an encrypted form while at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_file_obj">
<code class="descname">load_file_obj</code><span class="sig-paren">(</span><em>file_obj</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file_obj"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file_obj" title="Permalink to this definition"></a></dt>
<dd><p>Loads a file object to S3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_obj</strong> (<em>file-like object</em>) – The file-like object to set as the content for the S3 key.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag that indicates whether to overwrite the key
if it already exists.</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, S3 encrypts the file on the server,
and the file is stored in encrypted form at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_string">
<code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em>, <em>encoding='utf-8'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_string" title="Permalink to this definition"></a></dt>
<dd><p>Loads a string to S3</p>
<p>This is provided as a convenience to drop a string in S3. It uses the
boto infrastructure to ship a file to s3.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>string_data</strong> (<em>str</em>) – string to set as content for the key.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key
if it already exists</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side
by S3 and will be stored in an encrypted form while at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.read_key">
<code class="descname">read_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.read_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.read_key" title="Permalink to this definition"></a></dt>
<dd><p>Reads a key from S3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.select_key">
<code class="descname">select_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em>, <em>expression='SELECT * FROM S3Object'</em>, <em>expression_type='SQL'</em>, <em>input_serialization={'CSV': {}}</em>, <em>output_serialization={'CSV': {}}</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.select_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.select_key" title="Permalink to this definition"></a></dt>
<dd><p>Reads a key with S3 Select.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li>
<li><strong>expression</strong> (<em>str</em>) – S3 Select expression</li>
<li><strong>expression_type</strong> (<em>str</em>) – S3 Select expression type</li>
<li><strong>input_serialization</strong> (<em>dict</em>) – S3 Select input data serialization format</li>
<li><strong>output_serialization</strong> (<em>dict</em>) – S3 Select output data serialization format</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">retrieved subset of original data by S3 Select</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str</p>
</td>
</tr>
</tbody>
</table>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more details about S3 Select parameters:
<a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content">http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content</a></p>
</div>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="s3filetransformoperator">
<span id="id11"></span><h4>S3FileTransformOperator<a class="headerlink" href="#s3filetransformoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator">
<em class="property">class </em><code class="descclassname">airflow.operators.s3_file_transform_operator.</code><code class="descname">S3FileTransformOperator</code><span class="sig-paren">(</span><em>source_s3_key</em>, <em>dest_s3_key</em>, <em>transform_script=None</em>, <em>select_expression=None</em>, <em>source_aws_conn_id='aws_default'</em>, <em>dest_aws_conn_id='aws_default'</em>, <em>replace=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies data from a source S3 location to a temporary location on the
local filesystem. Runs a transformation on this file as specified by
the transformation script and uploads the output to a destination S3
location.</p>
<p>The locations of the source and the destination files in the local
filesystem is provided as an first and second arguments to the
transformation script. The transformation script is expected to read the
data from source, transform it and write the output to the local
destination file. The operator then takes over control and uploads the
local destination file to S3.</p>
<p>S3 Select is also available to filter the source contents. Users can
omit the transformation script if S3 Select expression is specified.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li>
<li><strong>source_aws_conn_id</strong> (<em>str</em>) – source s3 connection</li>
<li><strong>dest_s3_key</strong> (<em>str</em>) – The key to be written from S3. (templated)</li>
<li><strong>dest_aws_conn_id</strong> (<em>str</em>) – destination s3 connection</li>
<li><strong>replace</strong> (<em>bool</em>) – Replace dest S3 key if it already exists</li>
<li><strong>transform_script</strong> (<em>str</em>) – location of the executable transformation script</li>
<li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="s3listoperator">
<span id="id12"></span><h4>S3ListOperator<a class="headerlink" href="#s3listoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_list_operator.</code><code class="descname">S3ListOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>aws_conn_id='aws_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>List all objects from the bucket with the given string prefix in name.</p>
<p>This operator returns a python list with the name of objects which can be
used by <cite>xcom</cite> in the downstream task.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li>
<li><strong>prefix</strong> (<em>string</em>) – Prefix string to filters the objects whose name begin with
such prefix. (templated)</li>
<li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – The connection ID to use when connecting to S3 storage.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following operator would list all the files
(excluding subfolders) from the S3
<code class="docutils literal notranslate"><span class="pre">customers/2018/04/</span></code> key in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">s3_file</span> <span class="o">=</span> <span class="n">S3ListOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;list_3s_files&#39;</span><span class="p">,</span>
<span class="n">bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">prefix</span><span class="o">=</span><span class="s1">&#39;customers/2018/04/&#39;</span><span class="p">,</span>
<span class="n">delimiter</span><span class="o">=</span><span class="s1">&#39;/&#39;</span><span class="p">,</span>
<span class="n">aws_conn_id</span><span class="o">=</span><span class="s1">&#39;aws_customers_conn&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
<dl class="method">
<dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="s3togooglecloudstorageoperator">
<span id="id13"></span><h4>S3ToGoogleCloudStorageOperator<a class="headerlink" href="#s3togooglecloudstorageoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_to_gcs_operator.</code><code class="descname">S3ToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>aws_conn_id='aws_default'</em>, <em>dest_gcs_conn_id=None</em>, <em>dest_gcs=None</em>, <em>delegate_to=None</em>, <em>replace=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="airflow.contrib.operators.s3_list_operator.S3ListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.s3_list_operator.S3ListOperator</span></code></a></p>
<p>Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage
destination path.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li>
<li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with
such prefix. (templated)</li>
<li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – The source S3 connection</li>
<li><strong>dest_gcs_conn_id</strong> (<em>string</em>) – The destination connection ID to use
when connecting to Google Cloud Storage.</li>
<li><strong>dest_gcs</strong> (<em>string</em>) – The destination Google Cloud Storage bucket and prefix
where you want to store the files. (templated)</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>replace</strong> (<em>bool</em>) – Whether you want to replace existing destination files
or not.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p><strong>Example</strong>:
.. code-block:: python</p>
<blockquote>
<div><dl class="docutils">
<dt>s3_to_gcs_op = S3ToGoogleCloudStorageOperator(</dt>
<dd>task_id=’s3_to_gcs_example’,
bucket=’my-s3-bucket’,
prefix=’data/customers-201804’,
dest_gcs_conn_id=’google_cloud_default’,
dest_gcs=’gs://my.gcs.bucket/some/customers/’,
replace=False,
dag=my-dag)</dd>
</dl>
</div></blockquote>
<p>Note that <code class="docutils literal notranslate"><span class="pre">bucket</span></code>, <code class="docutils literal notranslate"><span class="pre">prefix</span></code>, <code class="docutils literal notranslate"><span class="pre">delimiter</span></code> and <code class="docutils literal notranslate"><span class="pre">dest_gcs</span></code> are
templated, so you can use variables in them if you wish.</p>
<dl class="method">
<dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="s3tohivetransfer">
<span id="id14"></span><h4>S3ToHiveTransfer<a class="headerlink" href="#s3tohivetransfer" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer">
<em class="property">class </em><code class="descclassname">airflow.operators.s3_to_hive_operator.</code><code class="descname">S3ToHiveTransfer</code><span class="sig-paren">(</span><em>s3_key</em>, <em>field_dict</em>, <em>hive_table</em>, <em>delimiter='</em>, <em>'</em>, <em>create=True</em>, <em>recreate=False</em>, <em>partition=None</em>, <em>headers=False</em>, <em>check_headers=False</em>, <em>wildcard_match=False</em>, <em>aws_conn_id='aws_default'</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>input_compressed=False</em>, <em>tblproperties=None</em>, <em>select_expression=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Moves data from S3 to Hive. The operator downloads a file from S3,
stores the file locally before loading it into a Hive table.
If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>,
a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated.
Hive data types are inferred from the cursor’s metadata from.</p>
<p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code>
which isn’t the most efficient serialization format. If a
large amount of data is loaded and/or if the tables gets
queried considerably, you may want to use this operator only to
stage the data into a temporary table before loading it into its
final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li>
<li><strong>field_dict</strong> (<em>dict</em>) – A dictionary of the fields name in the file
as keys and their Hive types as values</li>
<li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a
specific database. (templated)</li>
<li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li>
<li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every
execution</li>
<li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns
and values. (templated)</li>
<li><strong>headers</strong> (<em>bool</em>) – whether the file contains column names on the first
line</li>
<li><strong>check_headers</strong> (<em>bool</em>) – whether the column names on the first line should be
checked against the keys of field_dict</li>
<li><strong>wildcard_match</strong> (<em>bool</em>) – whether the s3_key should be interpreted as a Unix
wildcard pattern</li>
<li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – source s3 connection</li>
<li><strong>hive_cli_conn_id</strong> (<em>str</em>) – destination hive connection</li>
<li><strong>input_compressed</strong> (<em>bool</em>) – Boolean to determine if file decompression is
required to process headers</li>
<li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li>
<li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="aws-ec2-container-service">
<h3>AWS EC2 Container Service<a class="headerlink" href="#aws-ec2-container-service" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#ecsoperator"><span class="std std-ref">ECSOperator</span></a> : Execute a task on AWS EC2 Container Service.</li>
</ul>
<div class="section" id="ecsoperator">
<span id="id15"></span><h4>ECSOperator<a class="headerlink" href="#ecsoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.ecs_operator.ECSOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.ecs_operator.</code><code class="descname">ECSOperator</code><span class="sig-paren">(</span><em>task_definition</em>, <em>cluster</em>, <em>overrides</em>, <em>aws_conn_id=None</em>, <em>region_name=None</em>, <em>launch_type='EC2'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Execute a task on AWS EC2 Container Service</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>task_definition</strong> (<em>str</em>) – the task definition name on EC2 Container Service</li>
<li><strong>cluster</strong> (<em>str</em>) – the cluster name on EC2 Container Service</li>
<li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive (templated):
<a class="reference external" href="http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task">http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task</a></li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None,
credential boto3 strategy will be used
(<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li>
<li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook.
Override the region_name in connection (if provided)</li>
<li><strong>launch_type</strong> (<em>str</em>) – the launch type on which to run your task (‘EC2’ or ‘FARGATE’)</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.ecs_operator.ECSOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.operators.ecs_operator.ECSOperator.on_kill">
<code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator.on_kill" title="Permalink to this definition"></a></dt>
<dd><p>Override this method to cleanup subprocesses when a task instance
gets killed. Any use of the threading, subprocess or multiprocessing
module within an operator needs to be cleaned up or it will leave
ghost processes behind.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="aws-batch-service">
<h3>AWS Batch Service<a class="headerlink" href="#aws-batch-service" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#awsbatchoperator"><span class="std std-ref">AWSBatchOperator</span></a> : Execute a task on AWS Batch Service.</li>
</ul>
<div class="section" id="awsbatchoperator">
<span id="id16"></span><h4>AWSBatchOperator<a class="headerlink" href="#awsbatchoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.awsbatch_operator.</code><code class="descname">AWSBatchOperator</code><span class="sig-paren">(</span><em>job_name</em>, <em>job_definition</em>, <em>job_queue</em>, <em>overrides</em>, <em>max_retries=4200</em>, <em>aws_conn_id=None</em>, <em>region_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Execute a job on AWS Batch Service</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>job_name</strong> (<em>str</em>) – the name for the job that will run on AWS Batch</li>
<li><strong>job_definition</strong> (<em>str</em>) – the job definition name on AWS Batch</li>
<li><strong>job_queue</strong> (<em>str</em>) – the queue name on AWS Batch</li>
<li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive on
containerOverrides (templated).
<a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job">http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job</a></li>
<li><strong>max_retries</strong> (<em>int</em>) – exponential backoff retries while waiter is not merged,
4200 = 48 hours</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None,
credential boto3 strategy will be used
(<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li>
<li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook.
Override the region_name in connection (if provided)</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.on_kill">
<code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.on_kill" title="Permalink to this definition"></a></dt>
<dd><p>Override this method to cleanup subprocesses when a task instance
gets killed. Any use of the threading, subprocess or multiprocessing
module within an operator needs to be cleaned up or it will leave
ghost processes behind.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="aws-redshift">
<h3>AWS RedShift<a class="headerlink" href="#aws-redshift" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#awsredshiftclustersensor"><span class="std std-ref">AwsRedshiftClusterSensor</span></a> : Waits for a Redshift cluster to reach a specific status.</li>
<li><a class="reference internal" href="#redshifthook"><span class="std std-ref">RedshiftHook</span></a> : Interact with AWS Redshift, using the boto3 library.</li>
<li><a class="reference internal" href="#redshifttos3transfer"><span class="std std-ref">RedshiftToS3Transfer</span></a> : Executes an unload command to S3 as CSV with or without headers.</li>
<li><a class="reference internal" href="#s3toredshifttransfer"><span class="std std-ref">S3ToRedshiftTransfer</span></a> : Executes an copy command from S3 as CSV with or without headers.</li>
</ul>
<div class="section" id="awsredshiftclustersensor">
<span id="id17"></span><h4>AwsRedshiftClusterSensor<a class="headerlink" href="#awsredshiftclustersensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_redshift_cluster_sensor.</code><code class="descname">AwsRedshiftClusterSensor</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>target_status='available'</em>, <em>aws_conn_id='aws_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Waits for a Redshift cluster to reach a specific status.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_identifier</strong> (<em>str</em>) – The identifier for the cluster being pinged.</li>
<li><strong>target_status</strong> (<em>str</em>) – The cluster status desired.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="redshifthook">
<span id="id18"></span><h4>RedshiftHook<a class="headerlink" href="#redshifthook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.redshift_hook.</code><code class="descname">RedshiftHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with AWS Redshift, using the boto3 library</p>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status">
<code class="descname">cluster_status</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.cluster_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status" title="Permalink to this definition"></a></dt>
<dd><p>Return status of a cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot">
<code class="descname">create_cluster_snapshot</code><span class="sig-paren">(</span><em>snapshot_identifier</em>, <em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.create_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot" title="Permalink to this definition"></a></dt>
<dd><p>Creates a snapshot of a cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li>
<li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster">
<code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>skip_final_cluster_snapshot=True</em>, <em>final_cluster_snapshot_identifier=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster" title="Permalink to this definition"></a></dt>
<dd><p>Delete a cluster and optionally create a snapshot</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li>
<li><strong>skip_final_cluster_snapshot</strong> (<em>bool</em>) – determines cluster snapshot creation</li>
<li><strong>final_cluster_snapshot_identifier</strong> (<em>str</em>) – name of final cluster snapshot</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots">
<code class="descname">describe_cluster_snapshots</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.describe_cluster_snapshots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots" title="Permalink to this definition"></a></dt>
<dd><p>Gets a list of snapshots for a cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot">
<code class="descname">restore_from_cluster_snapshot</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>snapshot_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.restore_from_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot" title="Permalink to this definition"></a></dt>
<dd><p>Restores a cluster from its snapshot</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li>
<li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="redshifttos3transfer">
<span id="id19"></span><h4>RedshiftToS3Transfer<a class="headerlink" href="#redshifttos3transfer" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer">
<em class="property">class </em><code class="descclassname">airflow.operators.redshift_to_s3_operator.</code><code class="descname">RedshiftToS3Transfer</code><span class="sig-paren">(</span><em>schema</em>, <em>table</em>, <em>s3_bucket</em>, <em>s3_key</em>, <em>redshift_conn_id='redshift_default'</em>, <em>aws_conn_id='aws_default'</em>, <em>unload_options=()</em>, <em>autocommit=False</em>, <em>parameters=None</em>, <em>include_header=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Executes an UNLOAD command to s3 as a CSV with headers</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li>
<li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li>
<li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li>
<li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li>
<li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li>
<li><strong>unload_options</strong> (<em>list</em>) – reference to a list of UNLOAD options</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="s3toredshifttransfer">
<span id="id20"></span><h4>S3ToRedshiftTransfer<a class="headerlink" href="#s3toredshifttransfer" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer">
<em class="property">class </em><code class="descclassname">airflow.operators.s3_to_redshift_operator.</code><code class="descname">S3ToRedshiftTransfer</code><span class="sig-paren">(</span><em>schema</em>, <em>table</em>, <em>s3_bucket</em>, <em>s3_key</em>, <em>redshift_conn_id='redshift_default'</em>, <em>aws_conn_id='aws_default'</em>, <em>copy_options=()</em>, <em>autocommit=False</em>, <em>parameters=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Executes an COPY command to load files from s3 to Redshift</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li>
<li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li>
<li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li>
<li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li>
<li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li>
<li><strong>copy_options</strong> (<em>list</em>) – reference to a list of COPY options</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="databricks">
<span id="id21"></span><h2>Databricks<a class="headerlink" href="#databricks" title="Permalink to this headline"></a></h2>
<p><a class="reference external" href="https://databricks.com/">Databricks</a> has contributed an Airflow operator which enables
submitting runs to the Databricks platform. Internally the operator talks to the
<code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">endpoint</a>.</p>
<div class="section" id="databrickssubmitrunoperator">
<h3>DatabricksSubmitRunOperator<a class="headerlink" href="#databrickssubmitrunoperator" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.databricks_operator.</code><code class="descname">DatabricksSubmitRunOperator</code><span class="sig-paren">(</span><em>json=None</em>, <em>spark_jar_task=None</em>, <em>notebook_task=None</em>, <em>new_cluster=None</em>, <em>existing_cluster_id=None</em>, <em>libraries=None</em>, <em>run_name=None</em>, <em>timeout_seconds=None</em>, <em>databricks_conn_id='databricks_default'</em>, <em>polling_period_seconds=30</em>, <em>databricks_retry_limit=3</em>, <em>databricks_retry_delay=1</em>, <em>do_xcom_push=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Submits an Spark job run to Databricks using the
<a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">api/2.0/jobs/runs/submit</a>
API endpoint.</p>
<p>There are two ways to instantiate this operator.</p>
<p>In the first way, you can take the JSON payload that you typically use
to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint and pass it directly
to our <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> through the <code class="docutils literal notranslate"><span class="pre">json</span></code> parameter.
For example</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">json</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;new_cluster&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;spark_version&#39;</span><span class="p">:</span> <span class="s1">&#39;2.1.0-db3-scala2.11&#39;</span><span class="p">,</span>
<span class="s1">&#39;num_workers&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="p">},</span>
<span class="s1">&#39;notebook_task&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;notebook_path&#39;</span><span class="p">:</span> <span class="s1">&#39;/Users/airflow@example.com/PrepareData&#39;</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">}</span>
<span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;notebook_run&#39;</span><span class="p">,</span> <span class="n">json</span><span class="o">=</span><span class="n">json</span><span class="p">)</span>
</pre></div>
</div>
<p>Another way to accomplish the same thing is to use the named parameters
of the <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> directly. Note that there is exactly
one named parameter for each top level parameter in the <code class="docutils literal notranslate"><span class="pre">runs/submit</span></code>
endpoint. In this method, your code would look like this:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">new_cluster</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;spark_version&#39;</span><span class="p">:</span> <span class="s1">&#39;2.1.0-db3-scala2.11&#39;</span><span class="p">,</span>
<span class="s1">&#39;num_workers&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="p">}</span>
<span class="n">notebook_task</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;notebook_path&#39;</span><span class="p">:</span> <span class="s1">&#39;/Users/airflow@example.com/PrepareData&#39;</span><span class="p">,</span>
<span class="p">}</span>
<span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;notebook_run&#39;</span><span class="p">,</span>
<span class="n">new_cluster</span><span class="o">=</span><span class="n">new_cluster</span><span class="p">,</span>
<span class="n">notebook_task</span><span class="o">=</span><span class="n">notebook_task</span><span class="p">)</span>
</pre></div>
</div>
<p>In the case where both the json parameter <strong>AND</strong> the named parameters
are provided, they will be merged together. If there are conflicts during the merge,
the named parameters will take precedence and override the top level <code class="docutils literal notranslate"><span class="pre">json</span></code> keys.</p>
<dl class="docutils">
<dt>Currently the named parameters that <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> supports are</dt>
<dd><ul class="first last simple">
<li><code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">notebook_task</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">new_cluster</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">libraries</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">run_name</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">timeout_seconds</span></code></li>
</ul>
</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>json</strong> (<em>dict</em>) – <p>A JSON object containing API parameters which will be passed
directly to the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint. The other named parameters
(i.e. <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code>, <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code>..) to this operator will
be merged with this json dictionary if they are provided.
If there are conflicts during the merge, the named parameters will
take precedence and override the top level json keys. (templated)</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more information about templating see <a class="reference internal" href="concepts.html#jinja-templating"><span class="std std-ref">Jinja Templating</span></a>.
<a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">https://docs.databricks.com/api/latest/jobs.html#runs-submit</a></p>
</div>
</li>
<li><strong>spark_jar_task</strong> (<em>dict</em>) – <p>The main class and parameters for the JAR task. Note that
the actual JAR is specified in the <code class="docutils literal notranslate"><span class="pre">libraries</span></code>.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask">https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask</a></p>
</div>
</li>
<li><strong>notebook_task</strong> (<em>dict</em>) – <p>The notebook path and parameters for the notebook task.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask">https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask</a></p>
</div>
</li>
<li><strong>new_cluster</strong> (<em>dict</em>) – <p>Specs for a new cluster on which this task will be run.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster">https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster</a></p>
</div>
</li>
<li><strong>existing_cluster_id</strong> (<em>string</em>) – ID for existing cluster on which to run this task.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified.
This field will be templated.</li>
<li><strong>libraries</strong> (<em>list of dicts</em>) – <p>Libraries which this run will use.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary">https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary</a></p>
</div>
</li>
<li><strong>run_name</strong> (<em>string</em>) – The run name used for this task.
By default this will be set to the Airflow <code class="docutils literal notranslate"><span class="pre">task_id</span></code>. This <code class="docutils literal notranslate"><span class="pre">task_id</span></code> is a
required parameter of the superclass <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code>.
This field will be templated.</li>
<li><strong>timeout_seconds</strong> (<em>int32</em>) – The timeout for this run. By default a value of 0 is used
which means to have no timeout.
This field will be templated.</li>
<li><strong>databricks_conn_id</strong> (<em>string</em>) – The name of the Airflow connection to use.
By default and in the common case this will be <code class="docutils literal notranslate"><span class="pre">databricks_default</span></code>. To use
token based authentication, provide the key <code class="docutils literal notranslate"><span class="pre">token</span></code> in the extra field for the
connection.</li>
<li><strong>polling_period_seconds</strong> (<em>int</em>) – Controls the rate which we poll for the result of
this run. By default the operator will poll every 30 seconds.</li>
<li><strong>databricks_retry_limit</strong> (<em>int</em>) – Amount of times retry if the Databricks backend is
unreachable. Its value must be greater than or equal to 1.</li>
<li><strong>databricks_retry_delay</strong> (<em>float</em>) – Number of seconds to wait between retries (it
might be a floating point number).</li>
<li><strong>do_xcom_push</strong> (<em>boolean</em>) – Whether we should push run_id and run_page_url to xcom.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.on_kill">
<code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.on_kill" title="Permalink to this definition"></a></dt>
<dd><p>Override this method to cleanup subprocesses when a task instance
gets killed. Any use of the threading, subprocess or multiprocessing
module within an operator needs to be cleaned up or it will leave
ghost processes behind.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="gcp-google-cloud-platform">
<span id="gcp"></span><h2>GCP: Google Cloud Platform<a class="headerlink" href="#gcp-google-cloud-platform" title="Permalink to this headline"></a></h2>
<p>Airflow has extensive support for the Google Cloud Platform. But note that most Hooks and
Operators are in the contrib section. Meaning that they have a <em>beta</em> status, meaning that
they can have breaking changes between minor releases.</p>
<p>See the <a class="reference internal" href="howto/manage-connections.html#connection-type-gcp"><span class="std std-ref">GCP connection type</span></a> documentation to
configure connections to GCP.</p>
<div class="section" id="id22">
<h3>Logging<a class="headerlink" href="#id22" title="Permalink to this headline"></a></h3>
<p>Airflow can be configured to read and write task logs in Google Cloud Storage.
See <a class="reference internal" href="howto/write-logs.html#write-logs-gcp"><span class="std std-ref">Writing Logs to Google Cloud Storage</span></a>.</p>
</div>
<div class="section" id="bigquery">
<h3>BigQuery<a class="headerlink" href="#bigquery" title="Permalink to this headline"></a></h3>
<div class="section" id="bigquery-operators">
<h4>BigQuery Operators<a class="headerlink" href="#bigquery-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#bigquerycheckoperator"><span class="std std-ref">BigQueryCheckOperator</span></a> : Performs checks against a SQL query that will return a single row with different values.</li>
<li><a class="reference internal" href="#bigqueryvaluecheckoperator"><span class="std std-ref">BigQueryValueCheckOperator</span></a> : Performs a simple value check using SQL code.</li>
<li><a class="reference internal" href="#bigqueryintervalcheckoperator"><span class="std std-ref">BigQueryIntervalCheckOperator</span></a> : Checks that the values of metrics given as SQL expressions are within a certain tolerance of the ones from days_back before.</li>
<li><a class="reference internal" href="#bigquerycreateemptytableoperator"><span class="std std-ref">BigQueryCreateEmptyTableOperator</span></a> : Creates a new, empty table in the specified BigQuery dataset optionally with schema.</li>
<li><a class="reference internal" href="#bigquerycreateexternaltableoperator"><span class="std std-ref">BigQueryCreateExternalTableOperator</span></a> : Creates a new, external table in the dataset with the data in Google Cloud Storage.</li>
<li><a class="reference internal" href="#bigquerydeletedatasetoperator"><span class="std std-ref">BigQueryDeleteDatasetOperator</span></a> : Deletes an existing BigQuery dataset.</li>
<li><a class="reference internal" href="#bigqueryoperator"><span class="std std-ref">BigQueryOperator</span></a> : Executes BigQuery SQL queries in a specific BigQuery database.</li>
<li><a class="reference internal" href="#bigquerytobigqueryoperator"><span class="std std-ref">BigQueryToBigQueryOperator</span></a> : Copy a BigQuery table to another BigQuery table.</li>
<li><a class="reference internal" href="#bigquerytocloudstorageoperator"><span class="std std-ref">BigQueryToCloudStorageOperator</span></a> : Transfers a BigQuery table to a Google Cloud Storage bucket</li>
</ul>
<div class="section" id="bigquerycheckoperator">
<span id="id23"></span><h5>BigQueryCheckOperator<a class="headerlink" href="#bigquerycheckoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryCheckOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>use_legacy_sql=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p>
<p>Performs checks against BigQuery. The <code class="docutils literal notranslate"><span class="pre">BigQueryCheckOperator</span></code> expects
a sql query that will return a single row. Each value on that
first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the
values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p>
<p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">False</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">0</span></code></li>
<li>Empty string (<code class="docutils literal notranslate"><span class="pre">&quot;&quot;</span></code>)</li>
<li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li>
<li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li>
</ul>
<p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if
the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could,
for instance, check that the table has the same number of rows as
the source table upstream, or that the count of today’s partition is
greater than yesterday’s partition, or that a set of metrics are less
than 3 standard deviation for the 7 day average.</p>
<p>This operator can be used as a data quality check in your pipeline, and
depending on where you put it in your DAG, you have the choice to
stop the critical path, preventing from
publishing dubious data, or on the side and receive email alterts
without stopping the progress of the DAG.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to the BigQuery database</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true)
or standard SQL (false).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigqueryvaluecheckoperator">
<span id="id24"></span><h5>BigQueryValueCheckOperator<a class="headerlink" href="#bigqueryvaluecheckoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryValueCheckOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>pass_value</em>, <em>tolerance=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>use_legacy_sql=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a></p>
<p>Performs a simple value check using sql code.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true)
or standard SQL (false).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigqueryintervalcheckoperator">
<span id="id25"></span><h5>BigQueryIntervalCheckOperator<a class="headerlink" href="#bigqueryintervalcheckoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryIntervalCheckOperator</code><span class="sig-paren">(</span><em>table</em>, <em>metrics_thresholds</em>, <em>date_filter_column='ds'</em>, <em>days_back=-7</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>use_legacy_sql=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.IntervalCheckOperator" title="airflow.operators.check_operator.IntervalCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.IntervalCheckOperator</span></code></a></p>
<p>Checks that the values of metrics given as SQL expressions are within
a certain tolerance of the ones from days_back before.</p>
<p>This method constructs a query like so</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SELECT</span> <span class="p">{</span><span class="n">metrics_threshold_dict_key</span><span class="p">}</span> <span class="n">FROM</span> <span class="p">{</span><span class="n">table</span><span class="p">}</span>
<span class="n">WHERE</span> <span class="p">{</span><span class="n">date_filter_column</span><span class="p">}</span><span class="o">=&lt;</span><span class="n">date</span><span class="o">&gt;</span>
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> (<em>str</em>) – the table name</li>
<li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check
against. Defaults to 7 days</li>
<li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics, for
example ‘COUNT(*)’: 1.5 would require a 50 percent or less difference
between the current day, and the prior days_back.</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true)
or standard SQL (false).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerygetdataoperator">
<span id="id26"></span><h5>BigQueryGetDataOperator<a class="headerlink" href="#bigquerygetdataoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_get_data.</code><code class="descname">BigQueryGetDataOperator</code><span class="sig-paren">(</span><em>dataset_id</em>, <em>table_id</em>, <em>max_results='100'</em>, <em>selected_fields=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Fetches the data from a BigQuery table (alternatively fetch data for selected columns)
and returns data in a python list. The number of elements in the returned list will
be equal to the number of rows fetched. Each element in the list will again be a list
where element would represent the columns values for that row.</p>
<p><strong>Example Result</strong>: <code class="docutils literal notranslate"><span class="pre">[['Tony',</span> <span class="pre">'10'],</span> <span class="pre">['Mike',</span> <span class="pre">'20'],</span> <span class="pre">['Steve',</span> <span class="pre">'15']]</span></code></p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">If you pass fields to <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> which are in different order than the
order of columns already in
BQ table, the data will still be in the order of BQ table.
For example if the BQ table has 3 columns as
<code class="docutils literal notranslate"><span class="pre">[A,B,C]</span></code> and you pass ‘B,A’ in the <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code>
the data would still be of the form <code class="docutils literal notranslate"><span class="pre">'A,B'</span></code>.</p>
</div>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">get_data</span> <span class="o">=</span> <span class="n">BigQueryGetDataOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;get_data_from_bq&#39;</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="o">=</span><span class="s1">&#39;test_dataset&#39;</span><span class="p">,</span>
<span class="n">table_id</span><span class="o">=</span><span class="s1">&#39;Transaction_partitions&#39;</span><span class="p">,</span>
<span class="n">max_results</span><span class="o">=</span><span class="s1">&#39;100&#39;</span><span class="p">,</span>
<span class="n">selected_fields</span><span class="o">=</span><span class="s1">&#39;DATE&#39;</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>dataset_id</strong> – The dataset ID of the requested table. (templated)</li>
<li><strong>table_id</strong> (<em>string</em>) – The table ID of the requested table. (templated)</li>
<li><strong>max_results</strong> (<em>string</em>) – The maximum number of records (rows) to be fetched
from the table. (templated)</li>
<li><strong>selected_fields</strong> (<em>string</em>) – List of fields to return (comma-separated). If
unspecified, all fields are returned.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquerycreateemptytableoperator">
<span id="id27"></span><h5>BigQueryCreateEmptyTableOperator<a class="headerlink" href="#bigquerycreateemptytableoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyTableOperator</code><span class="sig-paren">(</span><em>dataset_id</em>, <em>table_id</em>, <em>project_id=None</em>, <em>schema_fields=None</em>, <em>gcs_schema_object=None</em>, <em>time_partitioning={}</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new, empty table in the specified BigQuery dataset,
optionally with schema.</p>
<p>The schema to be used for the BigQuery table may be specified in one of
two ways. You may either directly pass the schema fields in, or you may
point the operator to a Google cloud storage object name. The object in
Google cloud storage must be a JSON file with the schema fields in it.
You can also create a table without schema.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The project to create the table into. (templated)</li>
<li><strong>dataset_id</strong> (<em>string</em>) – The dataset to create the table into. (templated)</li>
<li><strong>table_id</strong> (<em>string</em>) – The Name of the table to be created. (templated)</li>
<li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">}]</span>
</pre></div>
</div>
</li>
<li><strong>gcs_schema_object</strong> (<em>string</em>) – Full path to the JSON file containing
schema (templated). For
example: <code class="docutils literal notranslate"><span class="pre">gs://test-bucket/dir1/dir2/employee_schema.json</span></code></li>
<li><strong>time_partitioning</strong> (<em>dict</em>) – <p>configure optional time partitioning fields i.e.
partition by field, type and expiration as per API specifications.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning</a></p>
</div>
</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google
cloud storage hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to
work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>labels</strong> (<em>dict</em>) – <p>a dictionary containing labels for the table, passed to BigQuery</p>
<p><strong>Example (with schema JSON in GCS)</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;BigQueryCreateEmptyTableOperator_task&#39;</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="o">=</span><span class="s1">&#39;ODS&#39;</span><span class="p">,</span>
<span class="n">table_id</span><span class="o">=</span><span class="s1">&#39;Employees&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;internal-gcp-project&#39;</span><span class="p">,</span>
<span class="n">gcs_schema_object</span><span class="o">=</span><span class="s1">&#39;gs://schema-bucket/employee_schema.json&#39;</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<p><strong>Corresponding Schema file</strong> (<code class="docutils literal notranslate"><span class="pre">employee_schema.json</span></code>):</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span>
<span class="p">{</span>
<span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">,</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">,</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span>
<span class="p">}</span>
<span class="p">]</span>
</pre></div>
</div>
<p><strong>Example (with schema in the DAG)</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;BigQueryCreateEmptyTableOperator_task&#39;</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="o">=</span><span class="s1">&#39;ODS&#39;</span><span class="p">,</span>
<span class="n">table_id</span><span class="o">=</span><span class="s1">&#39;Employees&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;internal-gcp-project&#39;</span><span class="p">,</span>
<span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">}],</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquerycreateexternaltableoperator">
<span id="id28"></span><h5>BigQueryCreateExternalTableOperator<a class="headerlink" href="#bigquerycreateexternaltableoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateExternalTableOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>source_objects</em>, <em>destination_project_dataset_table</em>, <em>schema_fields=None</em>, <em>schema_object=None</em>, <em>source_format='CSV'</em>, <em>compression='NONE'</em>, <em>skip_leading_rows=0</em>, <em>field_delimiter='</em>, <em>'</em>, <em>max_bad_records=0</em>, <em>quote_character=None</em>, <em>allow_quoted_newlines=False</em>, <em>allow_jagged_rows=False</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>src_fmt_configs={}</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new external table in the dataset with the data in Google Cloud
Storage.</p>
<p>The schema to be used for the BigQuery table may be specified in one of
two ways. You may either directly pass the schema fields in, or you may
point the operator to a Google cloud storage object name. The object in
Google cloud storage must be a JSON file with the schema fields in it.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to point the external table to. (templated)</li>
<li><strong>source_objects</strong> – List of Google cloud storage URIs to point
table to. (templated)
If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li>
<li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (&lt;project&gt;.)&lt;dataset&gt;.&lt;table&gt;
BigQuery table to load data into (templated). If &lt;project&gt; is not included,
project will be the project defined in the connection json.</li>
<li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">}]</span>
</pre></div>
</div>
<p>Should not be set when source_format is ‘DATASTORE_BACKUP’.</p>
</li>
<li><strong>schema_object</strong> – If set, a GCS object path pointing to a .json file that
contains the schema for the table. (templated)</li>
<li><strong>schema_object</strong> – string</li>
<li><strong>source_format</strong> (<em>string</em>) – File format of the data.</li>
<li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source.
Possible values include GZIP and NONE.
The default value is NONE.
This setting is ignored for Google Cloud Bigtable,
Google Cloud Datastore backups and Avro formats.</li>
<li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li>
<li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use for the CSV.</li>
<li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can
ignore when running the job.</li>
<li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li>
<li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li>
<li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns.
The missing values are treated as nulls. If false, records with missing trailing
columns are treated as bad records, and if there are too many bad records, an
invalid error is returned in the job result. Only applicable to CSV, ignored
for other formats.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google
cloud storage hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to
work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>:param labels a dictionary containing labels for the table, passed to BigQuery
:type labels: dict</p>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquerydeletedatasetoperator">
<span id="id29"></span><h5>BigQueryDeleteDatasetOperator<a class="headerlink" href="#bigquerydeletedatasetoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryDeleteDatasetOperator</code><span class="sig-paren">(</span><em>dataset_id</em>, <em>project_id=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>
This operator deletes an existing dataset from your Project in Big query.
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete</a>
:param project_id: The project id of the dataset.
:type project_id: string
:param dataset_id: The dataset to be deleted.
:type dataset_id: string</p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">delete_temp_data</span> <span class="o">=</span> <span class="n">BigQueryDeleteDatasetOperator</span><span class="p">(</span>
<span class="n">dataset_id</span> <span class="o">=</span> <span class="s1">&#39;temp-dataset&#39;</span><span class="p">,</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="s1">&#39;temp-project&#39;</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;_my_gcp_conn_&#39;</span><span class="p">,</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;Deletetemp&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigqueryoperator">
<span id="id30"></span><h5>BigQueryOperator<a class="headerlink" href="#bigqueryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryOperator</code><span class="sig-paren">(</span><em>bql=None</em>, <em>sql=None</em>, <em>destination_dataset_table=False</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>allow_large_results=False</em>, <em>flatten_results=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>udf_config=False</em>, <em>use_legacy_sql=True</em>, <em>maximum_billing_tier=None</em>, <em>maximum_bytes_billed=None</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>schema_update_options=()</em>, <em>query_params=None</em>, <em>labels=None</em>, <em>priority='INTERACTIVE'</em>, <em>time_partitioning={}</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Executes BigQuery SQL queries in a specific BigQuery database</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bql</strong> (<em>Can receive a str representing a sql statement</em><em>,
</em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file.
Template reference are recognized by str ending in '.sql'.</em>) – (Deprecated. Use <cite>sql</cite> parameter instead) the sql code to be
executed (templated)</li>
<li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>,
</em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file.
Template reference are recognized by str ending in '.sql'.</em>) – the sql code to be executed (templated)</li>
<li><strong>destination_dataset_table</strong> (<em>string</em>) – A dotted
(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt; that, if set, will store the results
of the query. (templated)</li>
<li><strong>write_disposition</strong> (<em>string</em>) – Specifies the action that occurs if the destination table
already exists. (default: ‘WRITE_EMPTY’)</li>
<li><strong>create_disposition</strong> (<em>string</em>) – Specifies whether the job is allowed to create new tables.
(default: ‘CREATE_IF_NEEDED’)</li>
<li><strong>allow_large_results</strong> (<em>boolean</em>) – Whether to allow large results.</li>
<li><strong>flatten_results</strong> (<em>boolean</em>) – If true and query uses legacy SQL dialect, flattens
all nested and repeated fields in the query results. <code class="docutils literal notranslate"><span class="pre">allow_large_results</span></code>
must be <code class="docutils literal notranslate"><span class="pre">true</span></code> if this is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>. For standard SQL queries, this
flag is ignored and results are never flattened.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>udf_config</strong> (<em>list</em>) – The User Defined Function configuration for the query.
See <a class="reference external" href="https://cloud.google.com/bigquery/user-defined-functions">https://cloud.google.com/bigquery/user-defined-functions</a> for details.</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) or standard SQL (false).</li>
<li><strong>maximum_billing_tier</strong> (<em>integer</em>) – Positive integer that serves as a multiplier
of the basic price.
Defaults to None, in which case it uses the value set in the project.</li>
<li><strong>maximum_bytes_billed</strong> (<em>float</em>) – Limits the bytes billed for this job.
Queries that will have bytes billed beyond this limit will fail
(without incurring a charge). If unspecified, this will be
set to your project default.</li>
<li><strong>schema_update_options</strong> (<em>tuple</em>) – Allows the schema of the destination
table to be updated as a side effect of the load job.</li>
<li><strong>query_params</strong> (<em>dict</em>) – a dictionary containing query parameter types and
values, passed to BigQuery.</li>
<li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query,
passed to BigQuery</li>
<li><strong>priority</strong> (<em>string</em>) – Specifies a priority for the query.
Possible values include INTERACTIVE and BATCH.
The default value is INTERACTIVE.</li>
<li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e.
partition by field, type and expiration as per API specifications.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator.on_kill">
<code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator.on_kill" title="Permalink to this definition"></a></dt>
<dd><p>Override this method to cleanup subprocesses when a task instance
gets killed. Any use of the threading, subprocess or multiprocessing
module within an operator needs to be cleaned up or it will leave
ghost processes behind.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquerytabledeleteoperator">
<span id="id31"></span><h5>BigQueryTableDeleteOperator<a class="headerlink" href="#bigquerytabledeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_table_delete_operator.</code><code class="descname">BigQueryTableDeleteOperator</code><span class="sig-paren">(</span><em>deletion_dataset_table</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>ignore_if_missing=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Deletes BigQuery tables</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>deletion_dataset_table</strong> (<em>string</em>) – A dotted
(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt; that indicates which table
will be deleted. (templated)</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>ignore_if_missing</strong> (<em>boolean</em>) – if True, then return success even if the
requested table does not exist.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquerytobigqueryoperator">
<span id="id32"></span><h5>BigQueryToBigQueryOperator<a class="headerlink" href="#bigquerytobigqueryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_bigquery.</code><code class="descname">BigQueryToBigQueryOperator</code><span class="sig-paren">(</span><em>source_project_dataset_tables</em>, <em>destination_project_dataset_table</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies data from one BigQuery table to another.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more details about these parameters:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_project_dataset_tables</strong> (<em>list|string</em>) – One or more
dotted (project:<a href="#id33"><span class="problematic" id="id34">|</span></a>project.)&lt;dataset&gt;.&lt;table&gt; BigQuery tables to use as the
source data. If &lt;project&gt; is not included, project will be the
project defined in the connection json. Use a list if there are multiple
source tables. (templated)</li>
<li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The destination BigQuery
table. Format is: (project:<a href="#id35"><span class="problematic" id="id36">|</span></a>project.)&lt;dataset&gt;.&lt;table&gt; (templated)</li>
<li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li>
<li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query,
passed to BigQuery</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquerytocloudstorageoperator">
<span id="id37"></span><h5>BigQueryToCloudStorageOperator<a class="headerlink" href="#bigquerytocloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_gcs.</code><code class="descname">BigQueryToCloudStorageOperator</code><span class="sig-paren">(</span><em>source_project_dataset_table</em>, <em>destination_cloud_storage_uris</em>, <em>compression='NONE'</em>, <em>export_format='CSV'</em>, <em>field_delimiter='</em>, <em>'</em>, <em>print_header=True</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Transfers a BigQuery table to a Google Cloud Storage bucket.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more details about these parameters:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">https://cloud.google.com/bigquery/docs/reference/v2/jobs</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_project_dataset_table</strong> (<em>string</em>) – The dotted
(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt; BigQuery table to use as the source
data. If &lt;project&gt; is not included, project will be the project
defined in the connection json. (templated)</li>
<li><strong>destination_cloud_storage_uris</strong> (<em>list</em>) – The destination Google Cloud
Storage URI (e.g. gs://some-bucket/some-file.txt). (templated) Follows
convention defined here:
https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple</li>
<li><strong>compression</strong> (<em>string</em>) – Type of compression to use.</li>
<li><strong>export_format</strong> – File format to export.</li>
<li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when extracting to a CSV.</li>
<li><strong>print_header</strong> (<em>boolean</em>) – Whether to print a header for a CSV file extract.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query,
passed to BigQuery</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="bigqueryhook">
<h4>BigQueryHook<a class="headerlink" href="#bigqueryhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.bigquery_hook.</code><code class="descname">BigQueryHook</code><span class="sig-paren">(</span><em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>use_legacy_sql=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a>, <a class="reference internal" href="code.html#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p>
<p>Interact with BigQuery. This hook uses the Google Cloud Platform
connection.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a BigQuery PEP 249 connection object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df">
<code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em>, <em>dialect=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Pandas DataFrame for the results produced by a BigQuery
query. The DbApiHook method must be overridden because Pandas
doesn’t support PEP 249 connections, except for SQLite. See:</p>
<p><a class="reference external" href="https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447">https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447</a>
<a class="reference external" href="https://github.com/pydata/pandas/issues/6900">https://github.com/pydata/pandas/issues/6900</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>string</em>) – The BigQuery SQL to execute.</li>
<li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with (not
used, leave to override superclass method)</li>
<li><strong>dialect</strong> (<em>string in {'legacy'</em><em>, </em><em>'standard'}</em>) – Dialect of BigQuery SQL – legacy SQL or standard SQL
defaults to use <cite>self.use_legacy_sql</cite> if not specified</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service">
<code class="descname">get_service</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_service"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service" title="Permalink to this definition"></a></dt>
<dd><p>Returns a BigQuery service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows">
<code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows" title="Permalink to this definition"></a></dt>
<dd><p>Insertion is currently unsupported. Theoretically, you could use
BigQuery’s streaming API to insert rows into a table, but this hasn’t
been implemented.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists">
<code class="descname">table_exists</code><span class="sig-paren">(</span><em>project_id</em>, <em>dataset_id</em>, <em>table_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists" title="Permalink to this definition"></a></dt>
<dd><p>Checks for the existence of a table in Google BigQuery.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google cloud project in which to look for the
table. The connection supplied to the hook must provide access to
the specified project.</li>
<li><strong>dataset_id</strong> (<em>string</em>) – The name of the dataset in which to look for the
table.</li>
<li><strong>table_id</strong> (<em>string</em>) – The name of the table to check the existence of.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-sql">
<h3>Cloud SQL<a class="headerlink" href="#cloud-sql" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-sql-operators">
<h4>Cloud SQL Operators<a class="headerlink" href="#cloud-sql-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><span class="xref std std-ref">CloudSqlInstanceDatabaseDeleteOperator</span> : deletes a database from a Cloud SQL</li>
</ul>
<p>instance.
- <span class="xref std std-ref">CloudSqlInstanceDatabaseCreateOperator</span> : creates a new database inside a Cloud
SQL instance.
- <span class="xref std std-ref">CloudSqlInstanceDatabasePatchOperator</span> : updates a database inside a Cloud
SQL instance.
- <span class="xref std std-ref">CloudSqlInstanceDeleteOperator</span> : delete a Cloud SQL instance.
- <a class="reference internal" href="howto/operator.html#cloudsqlinstancecreateoperator"><span class="std std-ref">CloudSqlInstanceCreateOperator</span></a> : create a new Cloud SQL instance.
- <a class="reference internal" href="howto/operator.html#cloudsqlinstancepatchoperator"><span class="std std-ref">CloudSqlInstancePatchOperator</span></a> : patch a Cloud SQL instance.</p>
<div class="section" id="cloudsqlinstancedatabasedeleteoperator">
<h5>CloudSqlInstanceDatabaseDeleteOperator<a class="headerlink" href="#cloudsqlinstancedatabasedeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseDeleteOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>database</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Deletes a database from a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancedatabasecreateoperator">
<h5>CloudSqlInstanceDatabaseCreateOperator<a class="headerlink" href="#cloudsqlinstancedatabasecreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseCreateOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Creates a new database inside a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancedatabasepatchoperator">
<h5>CloudSqlInstanceDatabasePatchOperator<a class="headerlink" href="#cloudsqlinstancedatabasepatchoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabasePatchOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>database</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabasePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Updates a resource containing information about a database inside a Cloud SQL
instance using patch semantics.
See: <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body</a></li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabasePatchOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancedeleteoperator">
<h5>CloudSqlInstanceDeleteOperator<a class="headerlink" href="#cloudsqlinstancedeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDeleteOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Deletes a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance to be deleted.</li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancecreateoperator">
<h5>CloudSqlInstanceCreateOperator<a class="headerlink" href="#cloudsqlinstancecreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceCreateOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em>, <em>instance</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Creates a new Cloud SQL instance.
If an instance with the same name exists, no action will be taken and
the operator will succeed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project to which the newly created Cloud SQL
instances should belong.</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert</a>
#request-body</li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – True if body should be validated, False otherwise.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancepatchoperator">
<h5>CloudSqlInstancePatchOperator<a class="headerlink" href="#cloudsqlinstancepatchoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstancePatchOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em>, <em>instance</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstancePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Updates settings of a Cloud SQL instance.</p>
<p>Caution: This is a partial update, so only included values for the settings will be
updated.</p>
<p>In the request body, supply the relevant portions of an instance resource, according
to the rules of patch semantics.
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a></li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstancePatchOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-sql-hook">
<h4>Cloud SQL Hook<a class="headerlink" href="#cloud-sql-hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Hook for Google Cloud SQL APIs.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database">
<code class="descname">create_database</code><span class="sig-paren">(</span><em>project</em>, <em>instance</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new database inside a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance">
<code class="descname">create_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project to which the newly created
Cloud SQL instances should belong.</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body</a></li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database">
<code class="descname">delete_database</code><span class="sig-paren">(</span><em>project</em>, <em>instance</em>, <em>database</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database" title="Permalink to this definition"></a></dt>
<dd><p>Deletes a database from a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance">
<code class="descname">delete_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance" title="Permalink to this definition"></a></dt>
<dd><p>Deletes a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves connection to Cloud SQL.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud SQL services object.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database">
<code class="descname">get_database</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>database</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves a database resource from a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database in the instance.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL database resource, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource</a></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance">
<code class="descname">get_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves a resource containing information about a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL instance resource.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database">
<code class="descname">patch_database</code><span class="sig-paren">(</span><em>project</em>, <em>instance</em>, <em>database</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database" title="Permalink to this definition"></a></dt>
<dd><p>Updates a database resource inside a Cloud SQL instance.
This method supports patch semantics.
See: <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance">
<code class="descname">patch_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em>, <em>instance</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance" title="Permalink to this definition"></a></dt>
<dd><p>Updates settings of a Cloud SQL instance.</p>
<p>Caution: This is not a partial update, so you must include values for
all the settings that you want to retain.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a></li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="compute-engine">
<h3>Compute Engine<a class="headerlink" href="#compute-engine" title="Permalink to this headline"></a></h3>
<div class="section" id="compute-engine-operators">
<h4>Compute Engine Operators<a class="headerlink" href="#compute-engine-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#gceinstancestartoperator"><span class="std std-ref">GceInstanceStartOperator</span></a> : start an existing Google Compute Engine instance.</li>
<li><a class="reference internal" href="#gceinstancestopoperator"><span class="std std-ref">GceInstanceStopOperator</span></a> : stop an existing Google Compute Engine instance.</li>
<li><a class="reference internal" href="#gcesetmachinetypeoperator"><span class="std std-ref">GceSetMachineTypeOperator</span></a> : change the machine type for a stopped instance.</li>
</ul>
<div class="section" id="gceinstancestartoperator">
<span id="id38"></span><h5>GceInstanceStartOperator<a class="headerlink" href="#gceinstancestartoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStartOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>zone</em>, <em>resource_id</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStartOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></p>
<p>Start an instance in Google Compute Engine.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform project where the Compute Engine
instance exists.</li>
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStartOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="gceinstancestopoperator">
<span id="id39"></span><h5>GceInstanceStopOperator<a class="headerlink" href="#gceinstancestopoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStopOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>zone</em>, <em>resource_id</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></p>
<p>Stop an instance in Google Compute Engine.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform project where the Compute Engine
instance exists.</li>
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStopOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="gcesetmachinetypeoperator">
<span id="id40"></span><h5>GceSetMachineTypeOperator<a class="headerlink" href="#gcesetmachinetypeoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceSetMachineTypeOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>zone</em>, <em>resource_id</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceSetMachineTypeOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></p>
<p>Changes the machine type for a stopped instance to the machine type specified in
the request.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform project where the Compute Engine
instance exists.</li>
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Compute Engine setMachineType API, as described in
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body">https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body</a></li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceSetMachineTypeOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="cloud-functions">
<h3>Cloud Functions<a class="headerlink" href="#cloud-functions" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-functions-operators">
<h4>Cloud Functions Operators<a class="headerlink" href="#cloud-functions-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#gcffunctiondeployoperator"><span class="std std-ref">GcfFunctionDeployOperator</span></a> : deploy Google Cloud Function to Google Cloud Platform</li>
<li><a class="reference internal" href="#gcffunctiondeleteoperator"><span class="std std-ref">GcfFunctionDeleteOperator</span></a> : delete Google Cloud Function in Google Cloud Platform</li>
</ul>
<div class="section" id="gcffunctiondeployoperator">
<span id="id41"></span><h5>GcfFunctionDeployOperator<a class="headerlink" href="#gcffunctiondeployoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeployOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>location</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>zip_path=None</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeployOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a function in Google Cloud Functions.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform Project ID where the function should
be created.</li>
<li><strong>location</strong> (<em>str</em>) – Google Cloud Platform region where the function should be created.</li>
<li><strong>body</strong> (<em>dict</em><em> or </em><em>google.cloud.functions.v1.CloudFunction</em>) – Body of the Cloud Functions definition. The body must be a
Cloud Functions dictionary as described in:
<a class="reference external" href="https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions">https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions</a>
. Different API versions require different variants of the Cloud Functions
dictionary.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID to use to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (for example v1 or v1beta1).</li>
<li><strong>zip_path</strong> (<em>str</em>) – Path to zip file containing source code of the function. If the path
is set, the sourceUploadUrl should not be specified in the body or it should
be empty. Then the zip file will be uploaded using the upload URL generated
via generateUploadUrl from the Cloud Functions API.</li>
<li><strong>validate_body</strong> (<em>bool</em>) – If set to False, body validation is not performed.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeployOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="gcffunctiondeleteoperator">
<span id="id42"></span><h5>GcfFunctionDeleteOperator<a class="headerlink" href="#gcffunctiondeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeleteOperator</code><span class="sig-paren">(</span><em>name</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Deletes the specified function from Google Cloud Functions.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>name</strong> (<em>str</em>) – A fully-qualified function name, matching
the pattern: <cite>^projects/[^/]+/locations/[^/]+/functions/[^/]+$</cite></li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID to use to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (for example v1 or v1beta1).</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-functions-hook">
<h4>Cloud Functions Hook<a class="headerlink" href="#cloud-functions-hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_function_hook.</code><code class="descname">GcfHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Hook for the Google Cloud Functions APIs.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function">
<code class="descname">create_new_function</code><span class="sig-paren">(</span><em>full_location</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.create_new_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new function in Cloud Function in the location specified in the body.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>full_location</strong> (<em>str</em>) – full location including the project in the form of
of /projects/&lt;PROJECT&gt;/location/&lt;LOCATION&gt;</li>
<li><strong>body</strong> (<em>dict</em>) – body required by the Cloud Functions insert API</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">response returned by the operation</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function">
<code class="descname">delete_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.delete_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the specified Cloud Function.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – name of the function</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">response returned by the operation</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves the connection to Cloud Functions.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud Function services object</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function">
<code class="descname">get_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function" title="Permalink to this definition"></a></dt>
<dd><p>Returns the Cloud Function with the given name.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – name of the function</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a CloudFunction object representing the function</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.list_functions">
<code class="descname">list_functions</code><span class="sig-paren">(</span><em>full_location</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.list_functions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.list_functions" title="Permalink to this definition"></a></dt>
<dd><p>Lists all Cloud Functions created in the location.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>full_location</strong> (<em>str</em>) – full location including the project in the form of
of /projects/&lt;PROJECT&gt;/location/&lt;LOCATION&gt;</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">array of CloudFunction objects - representing functions in the location</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">[dict]</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function">
<code class="descname">update_function</code><span class="sig-paren">(</span><em>name</em>, <em>body</em>, <em>update_mask</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.update_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function" title="Permalink to this definition"></a></dt>
<dd><p>Updates Cloud Functions according to the specified update mask.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>name</strong> (<em>str</em>) – name of the function</li>
<li><strong>body</strong> (<em>str</em>) – body required by the cloud function patch API</li>
<li><strong>update_mask</strong> (<em>[</em><em>str</em><em>]</em>) – update mask - array of fields that should be patched</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">response returned by the operation</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip">
<code class="descname">upload_function_zip</code><span class="sig-paren">(</span><em>parent</em>, <em>zip_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.upload_function_zip"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip" title="Permalink to this definition"></a></dt>
<dd><p>Uploads zip file with sources.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>parent</strong> (<em>str</em>) – Google Cloud Platform project id and region where zip file should
be uploaded in the form of /projects/&lt;PROJECT&gt;/location/&lt;LOCATION&gt;</li>
<li><strong>zip_path</strong> (<em>str</em>) – path of the valid .zip file to upload</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">Upload URL that was returned by generateUploadUrl method</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-dataflow">
<h3>Cloud DataFlow<a class="headerlink" href="#cloud-dataflow" title="Permalink to this headline"></a></h3>
<div class="section" id="dataflow-operators">
<h4>DataFlow Operators<a class="headerlink" href="#dataflow-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#dataflowjavaoperator"><span class="std std-ref">DataFlowJavaOperator</span></a> : launching Cloud Dataflow jobs written in Java.</li>
<li><a class="reference internal" href="#dataflowtemplateoperator"><span class="std std-ref">DataflowTemplateOperator</span></a> : launching a templated Cloud DataFlow batch job.</li>
<li><a class="reference internal" href="#dataflowpythonoperator"><span class="std std-ref">DataFlowPythonOperator</span></a> : launching Cloud Dataflow jobs written in python.</li>
</ul>
<div class="section" id="dataflowjavaoperator">
<span id="id43"></span><h5>DataFlowJavaOperator<a class="headerlink" href="#dataflowjavaoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowJavaOperator</code><span class="sig-paren">(</span><em>jar</em>, <em>dataflow_default_options=None</em>, <em>options=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em>, <em>job_class=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Java Cloud DataFlow batch job. The parameters of the operation
will be passed to the job.</p>
<p>It’s a good practice to define dataflow_* parameters in the default_args of the dag
like the project, zone and staging location.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;dataflow_default_options&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span><span class="p">:</span> <span class="s1">&#39;my-gcp-project&#39;</span><span class="p">,</span>
<span class="s1">&#39;zone&#39;</span><span class="p">:</span> <span class="s1">&#39;europe-west1-d&#39;</span><span class="p">,</span>
<span class="s1">&#39;stagingLocation&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://my-staging-bucket/staging/&#39;</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>You need to pass the path to your dataflow as a file reference with the <code class="docutils literal notranslate"><span class="pre">jar</span></code>
parameter, the jar needs to be a self executing jar (see documentation here:
<a class="reference external" href="https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar">https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar</a>).
Use <code class="docutils literal notranslate"><span class="pre">options</span></code> to pass on options to your job.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataFlowOperation</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;datapflow_example&#39;</span><span class="p">,</span>
<span class="n">jar</span><span class="o">=</span><span class="s1">&#39;{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar&#39;</span><span class="p">,</span>
<span class="n">options</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;autoscalingAlgorithm&#39;</span><span class="p">:</span> <span class="s1">&#39;BASIC&#39;</span><span class="p">,</span>
<span class="s1">&#39;maxNumWorkers&#39;</span><span class="p">:</span> <span class="s1">&#39;50&#39;</span><span class="p">,</span>
<span class="s1">&#39;start&#39;</span><span class="p">:</span> <span class="s1">&#39;{{ds}}&#39;</span><span class="p">,</span>
<span class="s1">&#39;partitionType&#39;</span><span class="p">:</span> <span class="s1">&#39;DAY&#39;</span><span class="p">,</span>
<span class="s1">&#39;labels&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;foo&#39;</span> <span class="p">:</span> <span class="s1">&#39;bar&#39;</span><span class="p">}</span>
<span class="p">},</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">&#39;gcp-airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<p>Both <code class="docutils literal notranslate"><span class="pre">jar</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> are templated so you can use variables in them.</p>
<dl class="method">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;owner&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">,</span>
<span class="s1">&#39;depends_on_past&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;start_date&#39;</span><span class="p">:</span>
<span class="p">(</span><span class="mi">2016</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
<span class="s1">&#39;email&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;alex@vanboxel.be&#39;</span><span class="p">],</span>
<span class="s1">&#39;email_on_failure&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;email_on_retry&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;retries&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;retry_delay&#39;</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">30</span><span class="p">),</span>
<span class="s1">&#39;dataflow_default_options&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span><span class="p">:</span> <span class="s1">&#39;my-gcp-project&#39;</span><span class="p">,</span>
<span class="s1">&#39;zone&#39;</span><span class="p">:</span> <span class="s1">&#39;us-central1-f&#39;</span><span class="p">,</span>
<span class="s1">&#39;stagingLocation&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://bucket/tmp/dataflow/staging/&#39;</span><span class="p">,</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">&#39;test-dag&#39;</span><span class="p">,</span> <span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">)</span>
<span class="n">task</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">&#39;gcp_default&#39;</span><span class="p">,</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;normalize-cal&#39;</span><span class="p">,</span>
<span class="n">jar</span><span class="o">=</span><span class="s1">&#39;{{var.value.gcp_dataflow_base}}pipeline-ingress-cal-normalize-1.0.jar&#39;</span><span class="p">,</span>
<span class="n">options</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;autoscalingAlgorithm&#39;</span><span class="p">:</span> <span class="s1">&#39;BASIC&#39;</span><span class="p">,</span>
<span class="s1">&#39;maxNumWorkers&#39;</span><span class="p">:</span> <span class="s1">&#39;50&#39;</span><span class="p">,</span>
<span class="s1">&#39;start&#39;</span><span class="p">:</span> <span class="s1">&#39;{{ds}}&#39;</span><span class="p">,</span>
<span class="s1">&#39;partitionType&#39;</span><span class="p">:</span> <span class="s1">&#39;DAY&#39;</span>
<span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="dataflowtemplateoperator">
<span id="id44"></span><h5>DataflowTemplateOperator<a class="headerlink" href="#dataflowtemplateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataflowTemplateOperator</code><span class="sig-paren">(</span><em>template</em>, <em>dataflow_default_options=None</em>, <em>parameters=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Templated Cloud DataFlow batch job. The parameters of the operation
will be passed to the job.
It’s a good practice to define dataflow_* parameters in the default_args of the dag
like the project, zone and staging location.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters</a>
<a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a></p>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;dataflow_default_options&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span><span class="p">:</span> <span class="s1">&#39;my-gcp-project&#39;</span>
<span class="s1">&#39;zone&#39;</span><span class="p">:</span> <span class="s1">&#39;europe-west1-d&#39;</span><span class="p">,</span>
<span class="s1">&#39;tempLocation&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://my-staging-bucket/staging/&#39;</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>You need to pass the path to your dataflow template as a file reference with the
<code class="docutils literal notranslate"><span class="pre">template</span></code> parameter. Use <code class="docutils literal notranslate"><span class="pre">parameters</span></code> to pass on parameters to your job.
Use <code class="docutils literal notranslate"><span class="pre">environment</span></code> to pass on runtime environment variables to your job.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataflowTemplateOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;datapflow_example&#39;</span><span class="p">,</span>
<span class="n">template</span><span class="o">=</span><span class="s1">&#39;{{var.value.gcp_dataflow_base}}&#39;</span><span class="p">,</span>
<span class="n">parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;inputFile&#39;</span><span class="p">:</span> <span class="s2">&quot;gs://bucket/input/my_input.txt&quot;</span><span class="p">,</span>
<span class="s1">&#39;outputFile&#39;</span><span class="p">:</span> <span class="s2">&quot;gs://bucket/output/my_output.txt&quot;</span>
<span class="p">},</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">&#39;gcp-airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">template</span></code>, <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">parameters</span></code> are templated so you can
use variables in them.</p>
<dl class="method">
<dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataflowpythonoperator">
<span id="id45"></span><h5>DataFlowPythonOperator<a class="headerlink" href="#dataflowpythonoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowPythonOperator</code><span class="sig-paren">(</span><em>py_file</em>, <em>py_options=None</em>, <em>dataflow_default_options=None</em>, <em>options=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Create a new DataFlowPythonOperator. Note that both
dataflow_default_options and options will be merged to specify pipeline
execution parameter, and dataflow_default_options is expected to save
high-level options, for instances, project and zone information, which
apply to all dataflow operators in the DAG.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on job submission have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>py_file</strong> (<em>string</em>) – Reference to the python dataflow pipleline file.py, e.g.,
/some/local/file/path/to/your/python/pipeline/file.</li>
<li><strong>py_options</strong> – Additional python options.</li>
<li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li>
<li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud
Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google
Cloud Platform for the dataflow job status while the job is in the
JOB_STATE_RUNNING state.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>Execute the python dataflow job.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="dataflowhook">
<h4>DataFlowHook<a class="headerlink" href="#dataflowhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_dataflow_hook.</code><code class="descname">DataFlowHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google Cloud Dataflow service object.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-dataproc">
<h3>Cloud DataProc<a class="headerlink" href="#cloud-dataproc" title="Permalink to this headline"></a></h3>
<div class="section" id="dataproc-operators">
<h4>DataProc Operators<a class="headerlink" href="#dataproc-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#dataprocclustercreateoperator"><span class="std std-ref">DataprocClusterCreateOperator</span></a> : Create a new cluster on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocclusterdeleteoperator"><span class="std std-ref">DataprocClusterDeleteOperator</span></a> : Delete a cluster on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocclusterscaleoperator"><span class="std std-ref">DataprocClusterScaleOperator</span></a> : Scale up or down a cluster on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocpigoperator"><span class="std std-ref">DataProcPigOperator</span></a> : Start a Pig query Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprochiveoperator"><span class="std std-ref">DataProcHiveOperator</span></a> : Start a Hive query Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocsparksqloperator"><span class="std std-ref">DataProcSparkSqlOperator</span></a> : Start a Spark SQL query Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocsparkoperator"><span class="std std-ref">DataProcSparkOperator</span></a> : Start a Spark Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprochadoopoperator"><span class="std std-ref">DataProcHadoopOperator</span></a> : Start a Hadoop Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocpysparkoperator"><span class="std std-ref">DataProcPySparkOperator</span></a> : Start a PySpark Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateOperator</span></a> : Instantiate a WorkflowTemplate on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateinlineoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateInlineOperator</span></a> : Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.</li>
</ul>
<div class="section" id="dataprocclustercreateoperator">
<span id="id46"></span><h5>DataprocClusterCreateOperator<a class="headerlink" href="#dataprocclustercreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterCreateOperator</code><span class="sig-paren">(</span><em>cluster_name</em>, <em>project_id</em>, <em>num_workers</em>, <em>zone</em>, <em>network_uri=None</em>, <em>subnetwork_uri=None</em>, <em>internal_ip_only=None</em>, <em>tags=None</em>, <em>storage_bucket=None</em>, <em>init_actions_uris=None</em>, <em>init_action_timeout='10m'</em>, <em>metadata=None</em>, <em>custom_image=None</em>, <em>image_version=None</em>, <em>properties=None</em>, <em>master_machine_type='n1-standard-4'</em>, <em>master_disk_type='pd-standard'</em>, <em>master_disk_size=500</em>, <em>worker_machine_type='n1-standard-4'</em>, <em>worker_disk_type='pd-standard'</em>, <em>worker_disk_size=500</em>, <em>num_preemptible_workers=0</em>, <em>labels=None</em>, <em>region='global'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>service_account=None</em>, <em>service_account_scopes=None</em>, <em>idle_delete_ttl=None</em>, <em>auto_delete_time=None</em>, <em>auto_delete_ttl=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Create a new cluster on Google Cloud Dataproc. The operator will wait until the
creation is successful or an error occurs in the creation process.</p>
<p>The parameters allow to configure the cluster. Please refer to</p>
<p><a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters</a></p>
<p>for a detailed explanation on the different parameters. Most of the configuration
parameters detailed in the link are available as a parameter to this operator.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster to create. (templated)</li>
<li><strong>project_id</strong> (<em>str</em>) – The ID of the google cloud project in which
to create the cluster. (templated)</li>
<li><strong>num_workers</strong> (<em>int</em>) – The # of workers to spin up. If set to zero will
spin up cluster in a single node mode</li>
<li><strong>storage_bucket</strong> (<em>string</em>) – The storage bucket to use, setting to None lets dataproc
generate a custom one for you</li>
<li><strong>init_actions_uris</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – List of GCS uri’s containing
dataproc initialization scripts</li>
<li><strong>init_action_timeout</strong> (<em>string</em>) – Amount of time executable scripts in
init_actions_uris has to complete</li>
<li><strong>metadata</strong> (<em>dict</em>) – dict of key-value google compute engine metadata entries
to add to all instances</li>
<li><strong>image_version</strong> (<em>string</em>) – the version of software inside the Dataproc cluster</li>
<li><strong>custom_image</strong> – custom Dataproc image for more info see
<a class="reference external" href="https://cloud.google.com/dataproc/docs/guides/dataproc-images">https://cloud.google.com/dataproc/docs/guides/dataproc-images</a></li>
<li><strong>properties</strong> (<em>dict</em>) – dict of properties to set on
config files (e.g. spark-defaults.conf), see
<a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/">https://cloud.google.com/dataproc/docs/reference/rest/v1/</a> projects.regions.clusters#SoftwareConfig</li>
<li><strong>master_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the master node</li>
<li><strong>master_disk_type</strong> (<em>string</em>) – Type of the boot disk for the master node
(default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>).
Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or
<code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li>
<li><strong>master_disk_size</strong> (<em>int</em>) – Disk size for the master node</li>
<li><strong>worker_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the worker nodes</li>
<li><strong>worker_disk_type</strong> (<em>string</em>) – Type of the boot disk for the worker node
(default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>).
Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or
<code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li>
<li><strong>worker_disk_size</strong> (<em>int</em>) – Disk size for the worker nodes</li>
<li><strong>num_preemptible_workers</strong> (<em>int</em>) – The # of preemptible worker nodes to spin up</li>
<li><strong>labels</strong> (<em>dict</em>) – dict of labels to add to the cluster</li>
<li><strong>zone</strong> (<em>string</em>) – The zone where the cluster will be located. (templated)</li>
<li><strong>network_uri</strong> (<em>string</em>) – The network uri to be used for machine communication, cannot be
specified with subnetwork_uri</li>
<li><strong>subnetwork_uri</strong> (<em>string</em>) – The subnetwork uri to be used for machine communication,
cannot be specified with network_uri</li>
<li><strong>internal_ip_only</strong> (<em>bool</em>) – If true, all instances in the cluster will only
have internal IP addresses. This can only be enabled for subnetwork
enabled networks</li>
<li><strong>tags</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The GCE tags to add to all instances</li>
<li><strong>region</strong> – leave as ‘global’, might become relevant in the future. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>service_account</strong> (<em>string</em>) – The service account of the dataproc instances.</li>
<li><strong>service_account_scopes</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The URIs of service account scopes to be included.</li>
<li><strong>idle_delete_ttl</strong> (<em>int</em>) – The longest duration that cluster would keep alive while
staying idle. Passing this threshold will cause cluster to be auto-deleted.
A duration in seconds.</li>
<li><strong>auto_delete_time</strong> (<em>datetime.datetime</em>) – The time when cluster will be auto-deleted.</li>
<li><strong>auto_delete_ttl</strong> (<em>int</em>) – The life duration of cluster, the cluster will be
auto-deleted at the end of this duration.
A duration in seconds. (If auto_delete_time is set this parameter will be ignored)</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">custom_image: string</p>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocclusterscaleoperator">
<span id="id47"></span><h5>DataprocClusterScaleOperator<a class="headerlink" href="#dataprocclusterscaleoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterScaleOperator</code><span class="sig-paren">(</span><em>cluster_name</em>, <em>project_id</em>, <em>region='global'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>num_workers=2</em>, <em>num_preemptible_workers=0</em>, <em>graceful_decommission_timeout=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Scale, up or down, a cluster on Google Cloud Dataproc.
The operator will wait until the cluster is re-scaled.</p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataprocClusterScaleOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;dataproc_scale&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;my-project&#39;</span><span class="p">,</span>
<span class="n">cluster_name</span><span class="o">=</span><span class="s1">&#39;cluster-1&#39;</span><span class="p">,</span>
<span class="n">num_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">num_preemptible_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">graceful_decommission_timeout</span><span class="o">=</span><span class="s1">&#39;1h&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on about scaling clusters have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters">https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to scale. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the cluster runs. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – The region for the dataproc cluster. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>num_workers</strong> (<em>int</em>) – The new number of workers</li>
<li><strong>num_preemptible_workers</strong> (<em>int</em>) – The new number of preemptible workers</li>
<li><strong>graceful_decommission_timeout</strong> (<em>string</em>) – Timeout for graceful YARN decomissioning.
Maximum value is 1d</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocclusterdeleteoperator">
<span id="id48"></span><h5>DataprocClusterDeleteOperator<a class="headerlink" href="#dataprocclusterdeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterDeleteOperator</code><span class="sig-paren">(</span><em>cluster_name</em>, <em>project_id</em>, <em>region='global'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Delete a cluster on Google Cloud Dataproc. The operator will wait until the
cluster is destroyed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to create. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the cluster runs. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocpigoperator">
<span id="id49"></span><h5>DataProcPigOperator<a class="headerlink" href="#dataprocpigoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPigOperator</code><span class="sig-paren">(</span><em>query=None</em>, <em>query_uri=None</em>, <em>variables=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_pig_properties=None</em>, <em>dataproc_pig_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation
will be passed to the cluster.</p>
<p>It’s a good practice to define dataproc_* parameters in the default_args of the dag
like the cluster name and UDFs.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;cluster_name&#39;</span><span class="p">:</span> <span class="s1">&#39;cluster-1&#39;</span><span class="p">,</span>
<span class="s1">&#39;dataproc_pig_jars&#39;</span><span class="p">:</span> <span class="p">[</span>
<span class="s1">&#39;gs://example/udf/jar/datafu/1.2.0/datafu.jar&#39;</span><span class="p">,</span>
<span class="s1">&#39;gs://example/udf/jar/gpig/1.2/gpig.jar&#39;</span>
<span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p>You can pass a pig script as string or file reference. Use variables to pass on
variables for the pig script to be resolved on the cluster or use the parameters to
be resolved in the script as template parameters.</p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataProcPigOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;dataproc_pig&#39;</span><span class="p">,</span>
<span class="n">query</span><span class="o">=</span><span class="s1">&#39;a_pig_script.pig&#39;</span><span class="p">,</span>
<span class="n">variables</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;out&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://example/output/{{ds}}&#39;</span><span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on about job submission have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs">https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>query</strong> (<em>string</em>) – The query or reference to the query
file (pg or pig extension). (templated)</li>
<li><strong>query_uri</strong> (<em>string</em>) – The uri of a pig script on Cloud Storage.</li>
<li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_pig_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_pig_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for
UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprochiveoperator">
<span id="id50"></span><h5>DataProcHiveOperator<a class="headerlink" href="#dataprochiveoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHiveOperator</code><span class="sig-paren">(</span><em>query=None</em>, <em>query_uri=None</em>, <em>variables=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_hive_properties=None</em>, <em>dataproc_hive_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Hive query Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension).</li>
<li><strong>query_uri</strong> (<em>string</em>) – The uri of a hive script on Cloud Storage.</li>
<li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query.</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This name by default
is the task_id appended with the execution data, but can be templated. The
name will always be appended with a random number to avoid name clashes.</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li>
<li><strong>dataproc_hive_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_hive_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for
UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocsparksqloperator">
<span id="id51"></span><h5>DataProcSparkSqlOperator<a class="headerlink" href="#dataprocsparksqloperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkSqlOperator</code><span class="sig-paren">(</span><em>query=None</em>, <em>query_uri=None</em>, <em>variables=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_spark_properties=None</em>, <em>dataproc_spark_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Spark SQL query Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension). (templated)</li>
<li><strong>query_uri</strong> (<em>string</em>) – The uri of a spark sql script on Cloud Storage.</li>
<li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocsparkoperator">
<span id="id52"></span><h5>DataProcSparkOperator<a class="headerlink" href="#dataprocsparkoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkOperator</code><span class="sig-paren">(</span><em>main_jar=None</em>, <em>main_class=None</em>, <em>arguments=None</em>, <em>archives=None</em>, <em>files=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_spark_properties=None</em>, <em>dataproc_spark_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Spark Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or
the main_class, not both together).</li>
<li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both
together).</li>
<li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li>
<li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work
directory. Should be stored in Cloud Storage.</li>
<li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprochadoopoperator">
<span id="id53"></span><h5>DataProcHadoopOperator<a class="headerlink" href="#dataprochadoopoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHadoopOperator</code><span class="sig-paren">(</span><em>main_jar=None</em>, <em>main_class=None</em>, <em>arguments=None</em>, <em>archives=None</em>, <em>files=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_hadoop_properties=None</em>, <em>dataproc_hadoop_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Hadoop Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or
the main_class, not both together).</li>
<li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both
together).</li>
<li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li>
<li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work
directory. Should be stored in Cloud Storage.</li>
<li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_hadoop_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_hadoop_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocpysparkoperator">
<span id="id54"></span><h5>DataProcPySparkOperator<a class="headerlink" href="#dataprocpysparkoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPySparkOperator</code><span class="sig-paren">(</span><em>main</em>, <em>arguments=None</em>, <em>archives=None</em>, <em>pyfiles=None</em>, <em>files=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_pyspark_properties=None</em>, <em>dataproc_pyspark_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a PySpark Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>main</strong> (<em>string</em>) – [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main
Python file to use as the driver. Must be a .py file.</li>
<li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li>
<li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work
directory. Should be stored in Cloud Storage.</li>
<li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li>
<li><strong>pyfiles</strong> (<em>list</em>) – List of Python files to pass to the PySpark framework.
Supported file types: .py, .egg, and .zip</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li>
<li><strong>dataproc_pyspark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_pyspark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="dataprocworkflowtemplateinstantiateoperator">
<span id="id55"></span><h5>DataprocWorkflowTemplateInstantiateOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateOperator</code><span class="sig-paren">(</span><em>template_id</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p>
<p>Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait
until the WorkflowTemplate is finished executing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">Please refer to:
<a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>template_id</strong> (<em>string</em>) – The id of the template. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the template runs</li>
<li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocworkflowtemplateinstantiateinlineoperator">
<span id="id56"></span><h5>DataprocWorkflowTemplateInstantiateInlineOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateinlineoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateInlineOperator</code><span class="sig-paren">(</span><em>template</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p>
<p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will
wait until the WorkflowTemplate is finished executing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">Please refer to:
<a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>template</strong> (<em>map</em>) – The template contents. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the template runs</li>
<li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="cloud-datastore">
<h3>Cloud Datastore<a class="headerlink" href="#cloud-datastore" title="Permalink to this headline"></a></h3>
<div class="section" id="datastore-operators">
<h4>Datastore Operators<a class="headerlink" href="#datastore-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#datastoreexportoperator"><span class="std std-ref">DatastoreExportOperator</span></a> : Export entities from Google Cloud Datastore to Cloud Storage.</li>
<li><a class="reference internal" href="#datastoreimportoperator"><span class="std std-ref">DatastoreImportOperator</span></a> : Import entities from Cloud Storage to Google Cloud Datastore.</li>
</ul>
<div class="section" id="datastoreexportoperator">
<span id="id57"></span><h5>DatastoreExportOperator<a class="headerlink" href="#datastoreexportoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_export_operator.</code><code class="descname">DatastoreExportOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>datastore_conn_id='google_cloud_default'</em>, <em>cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>entity_filter=None</em>, <em>labels=None</em>, <em>polling_interval_in_seconds=10</em>, <em>overwrite_existing=False</em>, <em>xcom_push=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Export entities from Google Cloud Datastore to Cloud Storage</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – name of the cloud storage bucket to backup data</li>
<li><strong>namespace</strong> (<em>str</em>) – optional namespace path in the specified Cloud Storage bucket
to backup data. If this namespace does not exist in GCS, it will be created.</li>
<li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the Datastore connection id to use</li>
<li><strong>cloud_storage_conn_id</strong> (<em>string</em>) – the name of the cloud storage connection id to
force-write backup</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in the
export, refer to
<a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li>
<li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li>
<li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for
execution status again</li>
<li><strong>overwrite_existing</strong> (<em>bool</em>) – if the storage bucket + namespace is not empty, it will be
emptied prior to exports. This enables overwriting existing backups.</li>
<li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="datastoreimportoperator">
<span id="id58"></span><h5>DatastoreImportOperator<a class="headerlink" href="#datastoreimportoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_import_operator.</code><code class="descname">DatastoreImportOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em>, <em>datastore_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>polling_interval_in_seconds=10</em>, <em>xcom_push=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Import entities from Cloud Storage to Google Cloud Datastore</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – container in Cloud Storage to store data</li>
<li><strong>file</strong> (<em>string</em>) – path of the backup metadata file in the specified Cloud Storage bucket.
It should have the extension .overall_export_metadata</li>
<li><strong>namespace</strong> (<em>str</em>) – optional namespace of the backup metadata file in
the specified Cloud Storage bucket.</li>
<li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in
the export, refer to
<a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li>
<li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li>
<li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the connection id to use</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for
execution status again</li>
<li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="datastorehook">
<h4>DatastoreHook<a class="headerlink" href="#datastorehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.datastore_hook.</code><code class="descname">DatastoreHook</code><span class="sig-paren">(</span><em>datastore_conn_id='google_cloud_datastore_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Interact with Google Cloud Datastore. This hook uses the Google Cloud Platform
connection.</p>
<p>This object is not threads safe. If you want to make multiple requests
simultaneously, you will need to create a hook per thread.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids">
<code class="descname">allocate_ids</code><span class="sig-paren">(</span><em>partialKeys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.allocate_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids" title="Permalink to this definition"></a></dt>
<dd><p>Allocate IDs for incomplete keys.
see <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>partialKeys</strong> – a list of partial keys</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a list of full keys.</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction">
<code class="descname">begin_transaction</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.begin_transaction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction" title="Permalink to this definition"></a></dt>
<dd><p>Get a new transaction handle</p>
<blockquote>
<div><div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction</a></p>
</div>
</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">a transaction handle</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.commit">
<code class="descname">commit</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.commit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.commit" title="Permalink to this definition"></a></dt>
<dd><p>Commit a transaction, optionally creating, deleting or modifying some entities.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the commit request</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the response body of the commit request</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation">
<code class="descname">delete_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.delete_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the long-running operation</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket">
<code class="descname">export_to_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.export_to_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Export entities from Cloud Datastore to Cloud Storage for backup</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><em>version='v1'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google Cloud Storage service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation">
<code class="descname">get_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation" title="Permalink to this definition"></a></dt>
<dd><p>Gets the latest state of a long-running operation</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket">
<code class="descname">import_from_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.import_from_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Import a backup from Cloud Storage to Cloud Datastore</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup">
<code class="descname">lookup</code><span class="sig-paren">(</span><em>keys</em>, <em>read_consistency=None</em>, <em>transaction=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.lookup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup" title="Permalink to this definition"></a></dt>
<dd><p>Lookup some entities by key</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>keys</strong> – the keys to lookup</li>
<li><strong>read_consistency</strong> – the read consistency to use. default, strong or eventual.
Cannot be used with a transaction.</li>
<li><strong>transaction</strong> – the transaction to use, if any.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">the response body of the lookup request.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done">
<code class="descname">poll_operation_until_done</code><span class="sig-paren">(</span><em>name</em>, <em>polling_interval_in_seconds</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.poll_operation_until_done"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done" title="Permalink to this definition"></a></dt>
<dd><p>Poll backup operation state until it’s completed</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback">
<code class="descname">rollback</code><span class="sig-paren">(</span><em>transaction</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.rollback"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback" title="Permalink to this definition"></a></dt>
<dd><p>Roll back a transaction</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>transaction</strong> – the transaction to roll back</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query">
<code class="descname">run_query</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query" title="Permalink to this definition"></a></dt>
<dd><p>Run a query for entities.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the query request</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the batch of query results.</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-ml-engine">
<h3>Cloud ML Engine<a class="headerlink" href="#cloud-ml-engine" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-ml-engine-operators">
<h4>Cloud ML Engine Operators<a class="headerlink" href="#cloud-ml-engine-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#mlenginebatchpredictionoperator"><span class="std std-ref">MLEngineBatchPredictionOperator</span></a> : Start a Cloud ML Engine batch prediction job.</li>
<li><a class="reference internal" href="#mlenginemodeloperator"><span class="std std-ref">MLEngineModelOperator</span></a> : Manages a Cloud ML Engine model.</li>
<li><a class="reference internal" href="#mlenginetrainingoperator"><span class="std std-ref">MLEngineTrainingOperator</span></a> : Start a Cloud ML Engine training job.</li>
<li><a class="reference internal" href="#mlengineversionoperator"><span class="std std-ref">MLEngineVersionOperator</span></a> : Manages a Cloud ML Engine model version.</li>
</ul>
<div class="section" id="mlenginebatchpredictionoperator">
<span id="id59"></span><h5>MLEngineBatchPredictionOperator<a class="headerlink" href="#mlenginebatchpredictionoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineBatchPredictionOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>job_id</em>, <em>region</em>, <em>data_format</em>, <em>input_paths</em>, <em>output_path</em>, <em>model_name=None</em>, <em>version_name=None</em>, <em>uri=None</em>, <em>max_worker_count=None</em>, <em>runtime_version=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Google Cloud ML Engine prediction job.</p>
<p>NOTE: For model origin, users should consider exactly one from the
three options below:
1. Populate ‘uri’ field only, which should be a GCS location that
points to a tensorflow savedModel directory.
2. Populate ‘model_name’ field only, which refers to an existing
model, and the default version of the model will be used.
3. Populate both ‘model_name’ and ‘version_name’ fields, which
refers to a specific version of a specific model.</p>
<p>In options 2 and 3, both model and version name should contain the
minimal identifier. For instance, call</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MLEngineBatchPredictionOperator</span><span class="p">(</span>
<span class="o">...</span><span class="p">,</span>
<span class="n">model_name</span><span class="o">=</span><span class="s1">&#39;my_model&#39;</span><span class="p">,</span>
<span class="n">version_name</span><span class="o">=</span><span class="s1">&#39;my_version&#39;</span><span class="p">,</span>
<span class="o">...</span><span class="p">)</span>
</pre></div>
</div>
<p>if the desired model version is
“projects/my_project/models/my_model/versions/my_version”.</p>
<p>See <a class="reference external" href="https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs">https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs</a>
for further documentation on the parameters.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name where the
prediction job is submitted. (templated)</li>
<li><strong>job_id</strong> (<em>string</em>) – A unique id for the prediction job on Google Cloud
ML Engine. (templated)</li>
<li><strong>data_format</strong> (<em>string</em>) – The format of the input data.
It will default to ‘DATA_FORMAT_UNSPECIFIED’ if is not provided
or is not one of [“TEXT”, “TF_RECORD”, “TF_RECORD_GZIP”].</li>
<li><strong>input_paths</strong> (<em>list of string</em>) – A list of GCS paths of input data for batch
prediction. Accepting wildcard operator <a href="#id60"><span class="problematic" id="id61">*</span></a>, but only at the end. (templated)</li>
<li><strong>output_path</strong> (<em>string</em>) – The GCS path where the prediction results are
written to. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the
prediction job in. (templated)</li>
<li><strong>model_name</strong> (<em>string</em>) – The Google Cloud ML Engine model to use for prediction.
If version_name is not provided, the default version of this
model will be used.
Should not be None if version_name is provided.
Should be None if uri is provided. (templated)</li>
<li><strong>version_name</strong> (<em>string</em>) – The Google Cloud ML Engine model version to use for
prediction.
Should be None if uri is provided. (templated)</li>
<li><strong>uri</strong> (<em>string</em>) – The GCS path of the saved model to use for prediction.
Should be None if model_name is provided.
It should be a GCS path pointing to a tensorflow SavedModel. (templated)</li>
<li><strong>max_worker_count</strong> (<em>int</em>) – The maximum number of workers to be used
for parallel processing. Defaults to 10 if not specified.</li>
<li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML Engine runtime version to use
for batch prediction.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID used for connection to Google
Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must
have doamin-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt>Raises:</dt>
<dd><code class="docutils literal notranslate"><span class="pre">ValueError</span></code>: if a unique model/version origin cannot be determined.</dd>
</dl>
<dl class="method">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="mlenginemodeloperator">
<span id="id62"></span><h5>MLEngineModelOperator<a class="headerlink" href="#mlenginemodeloperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineModelOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em>, <em>operation='create'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator for managing a Google Cloud ML Engine model.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine
model belongs. (templated)</li>
<li><strong>model</strong> (<em>dict</em>) – <p>A dictionary containing the information about the model.
If the <cite>operation</cite> is <cite>create</cite>, then the <cite>model</cite> parameter should
contain all the information about this model such as <cite>name</cite>.</p>
<p>If the <cite>operation</cite> is <cite>get</cite>, the <cite>model</cite> parameter
should contain the <cite>name</cite> of the model.</p>
</li>
<li><strong>operation</strong><p>The operation to perform. Available operations are:</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new model as provided by the <cite>model</cite> parameter.</li>
<li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets a particular model where the name is specified in <cite>model</cite>.</li>
</ul>
</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="mlenginetrainingoperator">
<span id="id63"></span><h5>MLEngineTrainingOperator<a class="headerlink" href="#mlenginetrainingoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineTrainingOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>job_id</em>, <em>package_uris</em>, <em>training_python_module</em>, <em>training_args</em>, <em>region</em>, <em>scale_tier=None</em>, <em>runtime_version=None</em>, <em>python_version=None</em>, <em>job_dir=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>mode='PRODUCTION'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator for launching a MLEngine training job.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name within which MLEngine
training job should run (templated).</li>
<li><strong>job_id</strong> (<em>string</em>) – A unique templated id for the submitted Google MLEngine
training job. (templated)</li>
<li><strong>package_uris</strong> (<em>string</em>) – A list of package locations for MLEngine training job,
which should include the main training program + any additional
dependencies. (templated)</li>
<li><strong>training_python_module</strong> (<em>string</em>) – The Python module name to run within MLEngine
training job after installing ‘package_uris’ packages. (templated)</li>
<li><strong>training_args</strong> (<em>string</em>) – A list of templated command line arguments to pass to
the MLEngine training program. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the MLEngine training
job in (templated).</li>
<li><strong>scale_tier</strong> (<em>string</em>) – Resource tier for MLEngine training job. (templated)</li>
<li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML runtime version to use for
training. (templated)</li>
<li><strong>python_version</strong> (<em>string</em>) – The version of Python used in training. (templated)</li>
<li><strong>job_dir</strong> (<em>string</em>) – A Google Cloud Storage path in which to store training
outputs and other data needed for training. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>mode</strong> (<em>string</em>) – Can be one of ‘DRY_RUN’/’CLOUD’. In ‘DRY_RUN’ mode, no real
training job will be launched, but the MLEngine training job request
will be printed out. In ‘CLOUD’ mode, a real MLEngine training job
creation request will be issued.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="mlengineversionoperator">
<span id="id64"></span><h5>MLEngineVersionOperator<a class="headerlink" href="#mlengineversionoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineVersionOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name=None</em>, <em>version=None</em>, <em>operation='create'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator for managing a Google Cloud ML Engine version.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine
model belongs.</li>
<li><strong>model_name</strong> (<em>string</em>) – The name of the Google Cloud ML Engine model that the version
belongs to. (templated)</li>
<li><strong>version_name</strong> (<em>string</em>) – A name to use for the version being operated upon.
If not None and the <cite>version</cite> argument is None or does not have a value for
the <cite>name</cite> key, then this will be populated in the payload for the
<cite>name</cite> key. (templated)</li>
<li><strong>version</strong> (<em>dict</em>) – A dictionary containing the information about the version.
If the <cite>operation</cite> is <cite>create</cite>, <cite>version</cite> should contain all the
information about this version such as name, and deploymentUrl.
If the <cite>operation</cite> is <cite>get</cite> or <cite>delete</cite>, the <cite>version</cite> parameter
should contain the <cite>name</cite> of the version.
If it is None, the only <cite>operation</cite> possible would be <cite>list</cite>. (templated)</li>
<li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new version in the model specified by <cite>model_name</cite>,
in which case the <cite>version</cite> parameter should contain all the
information to create that version
(e.g. <cite>name</cite>, <cite>deploymentUrl</cite>).</li>
<li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets full information of a particular version in the model
specified by <cite>model_name</cite>.
The name of the version should be specified in the <cite>version</cite>
parameter.</li>
<li><code class="docutils literal notranslate"><span class="pre">list</span></code>: Lists all available versions of the model specified
by <cite>model_name</cite>.</li>
<li><code class="docutils literal notranslate"><span class="pre">delete</span></code>: Deletes the version specified in <cite>version</cite> parameter from the
model specified by <cite>model_name</cite>).
The name of the version should be specified in the <cite>version</cite>
parameter.</li>
</ul>
</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-ml-engine-hook">
<h4>Cloud ML Engine Hook<a class="headerlink" href="#cloud-ml-engine-hook" title="Permalink to this headline"></a></h4>
<div class="section" id="mlenginehook">
<span id="id65"></span><h5>MLEngineHook<a class="headerlink" href="#mlenginehook" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_mlengine_hook.</code><code class="descname">MLEngineHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job">
<code class="descname">create_job</code><span class="sig-paren">(</span><em>project_id</em>, <em>job</em>, <em>use_existing_job_fn=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job" title="Permalink to this definition"></a></dt>
<dd><p>Launches a MLEngine job and wait for it to reach a terminal state.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project id within which MLEngine
job will be launched.</li>
<li><strong>job</strong> (<em>dict</em>) – <p>MLEngine Job object that should be provided to the MLEngine
API, such as:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s1">&#39;jobId&#39;</span><span class="p">:</span> <span class="s1">&#39;my_job_id&#39;</span><span class="p">,</span>
<span class="s1">&#39;trainingInput&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;scaleTier&#39;</span><span class="p">:</span> <span class="s1">&#39;STANDARD_1&#39;</span><span class="p">,</span>
<span class="o">...</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</li>
<li><strong>use_existing_job_fn</strong> (<em>function</em>) – In case that a MLEngine job with the same
job_id already exist, this method (if provided) will decide whether
we should use this existing job, continue waiting for it to finish
and returning the job object. It should accepts a MLEngine job
object, and returns a boolean value indicating whether it is OK to
reuse the existing job. If ‘use_existing_job_fn’ is not provided,
we by default reuse the existing MLEngine job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The MLEngine job object if the job successfully reach a
terminal state (which might be FAILED or CANCELLED state).</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model">
<code class="descname">create_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model" title="Permalink to this definition"></a></dt>
<dd><p>Create a Model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version">
<code class="descname">create_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_spec</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version" title="Permalink to this definition"></a></dt>
<dd><p>Creates the Version on Google Cloud ML Engine.</p>
<p>Returns the operation if the version was created successfully and
raises an error otherwise.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version">
<code class="descname">delete_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.delete_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the given version of a model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google MLEngine service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model">
<code class="descname">get_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model" title="Permalink to this definition"></a></dt>
<dd><p>Gets a Model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions">
<code class="descname">list_versions</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.list_versions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions" title="Permalink to this definition"></a></dt>
<dd><p>Lists all available versions of a model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version">
<code class="descname">set_default_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.set_default_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version" title="Permalink to this definition"></a></dt>
<dd><p>Sets a version to be the default. Blocks until finished.</p>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="cloud-storage">
<h3>Cloud Storage<a class="headerlink" href="#cloud-storage" title="Permalink to this headline"></a></h3>
<div class="section" id="storage-operators">
<h4>Storage Operators<a class="headerlink" href="#storage-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#filetogooglecloudstorageoperator"><span class="std std-ref">FileToGoogleCloudStorageOperator</span></a> : Uploads a file to Google Cloud Storage.</li>
<li><a class="reference internal" href="#googlecloudstoragecreatebucketoperator"><span class="std std-ref">GoogleCloudStorageCreateBucketOperator</span></a> : Creates a new cloud storage bucket.</li>
<li><a class="reference internal" href="#googlecloudstoragelistoperator"><span class="std std-ref">GoogleCloudStorageListOperator</span></a> : List all objects from the bucket with the give string prefix and delimiter in name.</li>
<li><a class="reference internal" href="#googlecloudstoragedownloadoperator"><span class="std std-ref">GoogleCloudStorageDownloadOperator</span></a> : Downloads a file from Google Cloud Storage.</li>
<li><a class="reference internal" href="#googlecloudstoragetobigqueryoperator"><span class="std std-ref">GoogleCloudStorageToBigQueryOperator</span></a> : Loads files from Google cloud storage into BigQuery.</li>
<li><a class="reference internal" href="#googlecloudstoragetogooglecloudstorageoperator"><span class="std std-ref">GoogleCloudStorageToGoogleCloudStorageOperator</span></a> : Copies objects from a bucket to another, with renaming if requested.</li>
</ul>
<div class="section" id="filetogooglecloudstorageoperator">
<span id="id66"></span><h5>FileToGoogleCloudStorageOperator<a class="headerlink" href="#filetogooglecloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_gcs.</code><code class="descname">FileToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>src</em>, <em>dst</em>, <em>bucket</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>mime_type='application/octet-stream'</em>, <em>delegate_to=None</em>, <em>gzip=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Uploads a file to Google Cloud Storage.
Optionally can compress the file for upload.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>src</strong> (<em>string</em>) – Path to the local file. (templated)</li>
<li><strong>dst</strong> (<em>string</em>) – Destination path within the specified bucket. (templated)</li>
<li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to. (templated)</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The Airflow connection ID to upload with</li>
<li><strong>mime_type</strong> (<em>string</em>) – The mime-type string</li>
<li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any</li>
<li><strong>gzip</strong> (<em>bool</em>) – Allows for file to be compressed and uploaded as gzip</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>Uploads the file to Google cloud storage</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragecreatebucketoperator">
<span id="id67"></span><h5>GoogleCloudStorageCreateBucketOperator<a class="headerlink" href="#googlecloudstoragecreatebucketoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_operator.</code><code class="descname">GoogleCloudStorageCreateBucketOperator</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new bucket. Google Cloud Storage uses a flat namespace,
so you can’t create a bucket with a name that is already in use.</p>
<blockquote>
<div><div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more information, see Bucket Naming Guidelines:
<a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p>
</div>
</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket. (templated)</li>
<li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored
and determines the SLA and the cost of storage (templated). Values include</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li>
</ul>
<p>If this value is not specified when the bucket is
created, it will default to STANDARD.</p>
</li>
<li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. (templated)
Object data for objects in the bucket resides in physical storage
within this region. Defaults to US.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p>
</div>
</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project. (templated)</li>
<li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must
have domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following Operator would create a new bucket <code class="docutils literal notranslate"><span class="pre">test-bucket</span></code>
with <code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code> storage class in <code class="docutils literal notranslate"><span class="pre">EU</span></code> region</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateBucket</span> <span class="o">=</span> <span class="n">GoogleCloudStorageCreateBucketOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;CreateNewBucket&#39;</span><span class="p">,</span>
<span class="n">bucket_name</span><span class="o">=</span><span class="s1">&#39;test-bucket&#39;</span><span class="p">,</span>
<span class="n">storage_class</span><span class="o">=</span><span class="s1">&#39;MULTI_REGIONAL&#39;</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="s1">&#39;EU&#39;</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;env&#39;</span><span class="p">:</span> <span class="s1">&#39;dev&#39;</span><span class="p">,</span> <span class="s1">&#39;team&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">},</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
<dl class="method">
<dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragedownloadoperator">
<span id="id68"></span><h5>GoogleCloudStorageDownloadOperator<a class="headerlink" href="#googlecloudstoragedownloadoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_download_operator.</code><code class="descname">GoogleCloudStorageDownloadOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em>, <em>store_to_xcom_key=None</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Downloads a file from Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is. (templated)</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to download in the Google cloud
storage bucket. (templated)</li>
<li><strong>filename</strong> (<em>string</em>) – The file path on the local file system (where the
operator is being executed) that the file should be downloaded to. (templated)
If no filename passed, the downloaded data will not be stored on the local file
system.</li>
<li><strong>store_to_xcom_key</strong> (<em>string</em>) – If this param is set, the operator will push
the contents of the downloaded file to XCom with the key set in this
parameter. If not set, the downloaded data will not be pushed to XCom. (templated)</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragelistoperator">
<span id="id69"></span><h5>GoogleCloudStorageListOperator<a class="headerlink" href="#googlecloudstoragelistoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_list_operator.</code><code class="descname">GoogleCloudStorageListOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>prefix=None</em>, <em>delimiter=None</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>List all objects from the bucket with the give string prefix and delimiter in name.</p>
<dl class="docutils">
<dt>This operator returns a python list with the name of objects which can be used by</dt>
<dd><cite>xcom</cite> in the downstream task.</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket to find the objects. (templated)</li>
<li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with
this prefix. (templated)</li>
<li><strong>delimiter</strong> (<em>string</em>) – The delimiter by which you want to filter the objects. (templated)
For e.g to lists the CSV files from in a directory in GCS you would use
delimiter=’.csv’.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following Operator would list all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code>
folder in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">GCS_Files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageListOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;GCS_Files&#39;</span><span class="p">,</span>
<span class="n">bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">prefix</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/&#39;</span><span class="p">,</span>
<span class="n">delimiter</span><span class="o">=</span><span class="s1">&#39;.avro&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
<dl class="method">
<dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragetobigqueryoperator">
<span id="id70"></span><h5>GoogleCloudStorageToBigQueryOperator<a class="headerlink" href="#googlecloudstoragetobigqueryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_bq.</code><code class="descname">GoogleCloudStorageToBigQueryOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>source_objects</em>, <em>destination_project_dataset_table</em>, <em>schema_fields=None</em>, <em>schema_object=None</em>, <em>source_format='CSV'</em>, <em>compression='NONE'</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>skip_leading_rows=0</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>field_delimiter='</em>, <em>'</em>, <em>max_bad_records=0</em>, <em>quote_character=None</em>, <em>ignore_unknown_values=False</em>, <em>allow_quoted_newlines=False</em>, <em>allow_jagged_rows=False</em>, <em>max_id_key=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>schema_update_options=()</em>, <em>src_fmt_configs={}</em>, <em>external_table=False</em>, <em>time_partitioning={}</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Loads files from Google cloud storage into BigQuery.</p>
<p>The schema to be used for the BigQuery table may be specified in one of
two ways. You may either directly pass the schema fields in, or you may
point the operator to a Google cloud storage object name. The object in
Google cloud storage must be a JSON file with the schema fields in it.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to load from. (templated)</li>
<li><strong>source_objects</strong> – List of Google cloud storage URIs to load from. (templated)
If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li>
<li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (&lt;project&gt;.)&lt;dataset&gt;.&lt;table&gt;
BigQuery table to load data into. If &lt;project&gt; is not included,
project will be the project defined in the connection json. (templated)</li>
<li><strong>schema_fields</strong> (<em>list</em>) – If set, the schema field list as defined here:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load</a>
Should not be set when source_format is ‘DATASTORE_BACKUP’.</li>
<li><strong>schema_object</strong> – If set, a GCS object path pointing to a .json file that
contains the schema for the table. (templated)</li>
<li><strong>schema_object</strong> – string</li>
<li><strong>source_format</strong> (<em>string</em>) – File format to export.</li>
<li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source.
Possible values include GZIP and NONE.
The default value is NONE.
This setting is ignored for Google Cloud Bigtable,
Google Cloud Datastore backups and Avro formats.</li>
<li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li>
<li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li>
<li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li>
<li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when loading from a CSV.</li>
<li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can
ignore when running the job.</li>
<li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li>
<li><strong>ignore_unknown_values</strong> (<em>bool</em>) – [Optional] Indicates if BigQuery should allow
extra values that are not represented in the table schema.
If true, the extra values are ignored. If false, records with extra columns
are treated as bad records, and if there are too many bad records, an
invalid error is returned in the job result.</li>
<li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li>
<li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns.
The missing values are treated as nulls. If false, records with missing trailing
columns are treated as bad records, and if there are too many bad records, an
invalid error is returned in the job result. Only applicable to CSV, ignored
for other formats.</li>
<li><strong>max_id_key</strong> (<em>string</em>) – If set, the name of a column in the BigQuery table
that’s to be loaded. Thsi will be used to select the MAX value from
BigQuery after the load occurs. The results will be returned by the
execute() command, which in turn gets stored in XCom for future
operators to use. This can be helpful with incremental loads–during
future executions, you can pick up from the max ID.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google
cloud storage hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to
work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>schema_update_options</strong> (<em>list</em>) – Allows the schema of the destination
table to be updated as a side effect of the load job.</li>
<li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li>
<li><strong>external_table</strong> (<em>bool</em>) – Flag to specify if the destination table should be
a BigQuery external table. Default Value is False.</li>
<li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e.
partition by field, type and expiration as per API specifications.
Note that ‘field’ is not available in concurrency with
dataset.table$partition.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragetogooglecloudstorageoperator">
<span id="id71"></span><h5>GoogleCloudStorageToGoogleCloudStorageOperator<a class="headerlink" href="#googlecloudstoragetogooglecloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em>, <em>move_object=False</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies objects from a bucket to another, with renaming if requested.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>string</em>) – The source Google cloud storage bucket where the
object is. (templated)</li>
<li><strong>source_object</strong> (<em>string</em>) – <p>The source name of the object to copy in the Google cloud
storage bucket. (templated)
If wildcards are used in this argument:</p>
<blockquote>
<div>You can use only one wildcard for objects (filenames) within your
bucket. The wildcard can appear inside the object name or at the
end of the object name. Appending a wildcard to the bucket name is
unsupported.</div></blockquote>
</li>
<li><strong>destination_bucket</strong> – The destination Google cloud storage bucket</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>where the object should be. (templated)
:type destination_bucket: string
:param destination_object: The destination name of the object in the</p>
<blockquote>
<div>destination Google cloud storage bucket. (templated)
If a wildcard is supplied in the source_object argument, this is the
prefix that will be prepended to the final destination objects’ paths.
Note that the source path’s part before the wildcard will be removed;
if it needs to be retained it should be appended to destination_object.
For example, with prefix <code class="docutils literal notranslate"><span class="pre">foo/*</span></code> and destination_object <cite>‘blah/`</cite>, the
file <code class="docutils literal notranslate"><span class="pre">foo/baz</span></code> will be copied to <code class="docutils literal notranslate"><span class="pre">blah/baz</span></code>; to retain the prefix write
the destination_object as e.g. <code class="docutils literal notranslate"><span class="pre">blah/foo</span></code>, in which case the copied file
will be named <code class="docutils literal notranslate"><span class="pre">blah/foo/baz</span></code>.</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>move_object</strong> – When move object is True, the object is moved instead</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt>of copied to the new location.</dt>
<dd>This is the equivalent of a mv command as opposed to a
cp command.</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Examples</strong>:</dt>
<dd><p class="first">The following Operator would copy a single file named
<code class="docutils literal notranslate"><span class="pre">sales/sales-2017/january.avro</span></code> in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the file named
<code class="docutils literal notranslate"><span class="pre">copied_sales/2017/january-backup.avro`</span> <span class="pre">in</span> <span class="pre">the</span> <span class="pre">``data_backup</span></code> bucket</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;copy_single_file&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/january.avro&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;data_backup&#39;</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">=</span><span class="s1">&#39;copied_sales/2017/january-backup.avro&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
<p>The following Operator would copy all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code>
folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the
<code class="docutils literal notranslate"><span class="pre">copied_sales/2017</span></code> folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;copy_files&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/*.avro&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;data_backup&#39;</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">=</span><span class="s1">&#39;copied_sales/2017/&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
<p>The following Operator would move all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code>
folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the
same folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket, deleting the original files in the
process.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">move_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;move_files&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/*.avro&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;data_backup&#39;</span><span class="p">,</span>
<span class="n">move_object</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
<dl class="method">
<dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="googlecloudstoragehook">
<h4>GoogleCloudStorageHook<a class="headerlink" href="#googlecloudstoragehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcs_hook.</code><code class="descname">GoogleCloudStorageHook</code><span class="sig-paren">(</span><em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Interact with Google Cloud Storage. This hook uses the Google Cloud Platform
connection.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy">
<code class="descname">copy</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.copy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy" title="Permalink to this definition"></a></dt>
<dd><p>Copies an object from a bucket to another, with renaming if requested.</p>
<p>destination_bucket or destination_object can be omitted, in which case
source bucket/object is used, but not both.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li>
<li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li>
<li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.
Can be omitted; then the same bucket is used.</li>
<li><strong>destination_object</strong> – The (renamed) path of the object if given.
Can be omitted; then the same name is used.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket">
<code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new bucket. Google Cloud Storage uses a flat namespace, so
you can’t create a bucket with a name that is already in use.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more information, see Bucket Naming Guidelines:
<a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket.</li>
<li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored
and determines the SLA and the cost of storage. Values include</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li>
</ul>
<p>If this value is not specified when the bucket is
created, it will default to STANDARD.</p>
</li>
<li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket.
Object data for objects in the bucket resides in physical storage
within this region. Defaults to US.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p>
</div>
</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project.</li>
<li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">If successful, it returns the <code class="docutils literal notranslate"><span class="pre">id</span></code> of the bucket.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete">
<code class="descname">delete</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>generation=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.delete"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete" title="Permalink to this definition"></a></dt>
<dd><p>Delete an object if versioning is not enabled for the bucket, or if generation
parameter is used.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket</strong> (<em>string</em>) – name of the bucket, where the object resides</li>
<li><strong>object</strong> (<em>string</em>) – name of the object to delete</li>
<li><strong>generation</strong> (<em>string</em>) – if present, permanently delete the object of this generation</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if succeeded</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download">
<code class="descname">download</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.download"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download" title="Permalink to this definition"></a></dt>
<dd><p>Get a file from Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to fetch from.</li>
<li><strong>object</strong> (<em>string</em>) – The object to fetch.</li>
<li><strong>filename</strong> (<em>string</em>) – If set, a local file path where the file should be written to.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists">
<code class="descname">exists</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists" title="Permalink to this definition"></a></dt>
<dd><p>Checks for the existence of a file in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google Cloud Storage service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c">
<code class="descname">get_crc32c</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_crc32c"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c" title="Permalink to this definition"></a></dt>
<dd><p>Gets the CRC32c checksum of an object in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash">
<code class="descname">get_md5hash</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_md5hash"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash" title="Permalink to this definition"></a></dt>
<dd><p>Gets the MD5 hash of an object in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size">
<code class="descname">get_size</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size" title="Permalink to this definition"></a></dt>
<dd><p>Gets the size of a file in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after">
<code class="descname">is_updated_after</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>ts</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.is_updated_after"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after" title="Permalink to this definition"></a></dt>
<dd><p>Checks if an object is updated in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
<li><strong>ts</strong> (<em>datetime</em>) – The timestamp to check against.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list">
<code class="descname">list</code><span class="sig-paren">(</span><em>bucket</em>, <em>versions=None</em>, <em>maxResults=None</em>, <em>prefix=None</em>, <em>delimiter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list" title="Permalink to this definition"></a></dt>
<dd><p>List all objects from the bucket with the give string prefix in name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket</strong> (<em>string</em>) – bucket name</li>
<li><strong>versions</strong> (<em>boolean</em>) – if true, list all versions of the objects</li>
<li><strong>maxResults</strong> (<em>integer</em>) – max count of items to return in a single page of responses</li>
<li><strong>prefix</strong> (<em>string</em>) – prefix string which filters objects whose name begin with
this prefix</li>
<li><strong>delimiter</strong> (<em>string</em>) – filters objects based on the delimiter (for e.g ‘.csv’)</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">a stream of object names matching the filtering criteria</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite">
<code class="descname">rewrite</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.rewrite"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite" title="Permalink to this definition"></a></dt>
<dd><p>Has the same functionality as copy, except that will work on files
over 5 TB, as well as when copying between locations and/or storage
classes.</p>
<p>destination_object can be omitted, in which case source_object is used.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li>
<li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li>
<li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.</li>
<li><strong>destination_object</strong> – The (renamed) path of the object if given.
Can be omitted; then the same name is used.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload">
<code class="descname">upload</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename</em>, <em>mime_type='application/octet-stream'</em>, <em>gzip=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload" title="Permalink to this definition"></a></dt>
<dd><p>Uploads a local file to Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to.</li>
<li><strong>object</strong> (<em>string</em>) – The object name to set when uploading the local file.</li>
<li><strong>filename</strong> (<em>string</em>) – The local file path to the file to be uploaded.</li>
<li><strong>mime_type</strong> (<em>str</em>) – The MIME type to set when uploading the file.</li>
<li><strong>gzip</strong> (<em>bool</em>) – Option to compress file for upload</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="google-kubernetes-engine">
<h3>Google Kubernetes Engine<a class="headerlink" href="#google-kubernetes-engine" title="Permalink to this headline"></a></h3>
<div class="section" id="google-kubernetes-engine-cluster-operators">
<h4>Google Kubernetes Engine Cluster Operators<a class="headerlink" href="#google-kubernetes-engine-cluster-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#id72"><span class="std std-ref">GKEClusterDeleteOperator</span></a> : Creates a Kubernetes Cluster in Google Cloud Platform</li>
<li><a class="reference internal" href="#id73"><span class="std std-ref">Google Kubernetes Engine Hook</span></a> : Deletes a Kubernetes Cluster in Google Cloud Platform</li>
</ul>
<div class="section" id="gkeclustercreateoperator">
<h5>GKEClusterCreateOperator<a class="headerlink" href="#gkeclustercreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_container_operator.</code><code class="descname">GKEClusterCreateOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>location</em>, <em>body={}</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v2'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="gkeclusterdeleteoperator">
<span id="id72"></span><h5>GKEClusterDeleteOperator<a class="headerlink" href="#gkeclusterdeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_container_operator.</code><code class="descname">GKEClusterDeleteOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>name</em>, <em>location</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v2'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<dl class="method">
<dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>This is the main method to derive when creating an operator.
Context is the same dictionary used as when rendering jinja templates.</p>
<p>Refer to get_template_context for more context.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="google-kubernetes-engine-hook">
<span id="id73"></span><h4>Google Kubernetes Engine Hook<a class="headerlink" href="#google-kubernetes-engine-hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_container_hook.</code><code class="descname">GKEClusterHook</code><span class="sig-paren">(</span><em>project_id</em>, <em>location</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.create_cluster">
<code class="descname">create_cluster</code><span class="sig-paren">(</span><em>cluster</em>, <em>retry=&lt;object object&gt;</em>, <em>timeout=&lt;object object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.create_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.create_cluster" title="Permalink to this definition"></a></dt>
<dd><p>Creates a cluster, consisting of the specified number and type of Google Compute
Engine instances.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>cluster</strong> (<em>dict</em><em> or </em><em>google.cloud.container_v1.types.Cluster</em>) – A Cluster protobuf or dict. If dict is provided, it must be of
the same form as the protobuf message google.cloud.container_v1.types.Cluster</li>
<li><strong>retry</strong> (<em>google.api_core.retry.Retry</em>) – A retry object (google.api_core.retry.Retry) used to retry requests.
If None is specified, requests will not be retried.</li>
<li><strong>timeout</strong> (<em>float</em>) – The amount of time, in seconds, to wait for the request to
complete. Note that if retry is specified, the timeout applies to each
individual attempt.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The full url to the new, or existing, cluster</p>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt>:raises</dt>
<dd>ParseError: On JSON parsing problems when trying to convert dict
AirflowException: cluster is not dict type nor Cluster proto type</dd>
</dl>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.delete_cluster">
<code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>name</em>, <em>retry=&lt;object object&gt;</em>, <em>timeout=&lt;object object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.delete_cluster" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the cluster, including the Kubernetes endpoint and all
worker nodes. Firewalls and routes that were configured during
cluster creation are also deleted. Other Google Compute Engine
resources that might be in use by the cluster (e.g. load balancer
resources) will not be deleted if they weren’t present at the
initial create time.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>name</strong> (<em>str</em>) – The name of the cluster to delete</li>
<li><strong>retry</strong> (<em>google.api_core.retry.Retry</em>) – Retry object used to determine when/if to retry requests.
If None is specified, requests will not be retried.</li>
<li><strong>timeout</strong> (<em>float</em>) – The amount of time, in seconds, to wait for the request to
complete. Note that if retry is specified, the timeout applies to each
individual attempt.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The full url to the delete operation if successful, else None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_cluster">
<code class="descname">get_cluster</code><span class="sig-paren">(</span><em>name</em>, <em>retry=&lt;object object&gt;</em>, <em>timeout=&lt;object object&gt;</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.get_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_cluster" title="Permalink to this definition"></a></dt>
<dd><p>Gets details of specified cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>name</strong> (<em>str</em>) – The name of the cluster to retrieve</li>
<li><strong>retry</strong> (<em>google.api_core.retry.Retry</em>) – A retry object used to retry requests. If None is specified,
requests will not be retried.</li>
<li><strong>timeout</strong> (<em>float</em>) – The amount of time, in seconds, to wait for the request to
complete. Note that if retry is specified, the timeout applies to each
individual attempt.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A google.cloud.container_v1.types.Cluster instance</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_operation">
<code class="descname">get_operation</code><span class="sig-paren">(</span><em>operation_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_operation" title="Permalink to this definition"></a></dt>
<dd><p>Fetches the operation from Google Cloud</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>operation_name</strong> (<em>str</em>) – Name of operation to fetch</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The new, updated operation from Google Cloud</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.wait_for_operation">
<code class="descname">wait_for_operation</code><span class="sig-paren">(</span><em>operation</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.wait_for_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.wait_for_operation" title="Permalink to this definition"></a></dt>
<dd><p>Given an operation, continuously fetches the status from Google Cloud until either
completion or an error occurring</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>operation</strong> (<em>A google.cloud.container_V1.gapic.enums.Operator</em>) – The Operation to wait for</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A new, updated operation fetched from Google Cloud</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="lineage.html" class="btn btn-neutral float-right" title="Lineage" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="api.html" class="btn btn-neutral" title="Experimental Rest API" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>